Merge branch 'master' into devel
[linux-2.6] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007,2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24 #include "locking.h"
25
26 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
27                       *root, struct btrfs_path *path, int level);
28 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
29                       *root, struct btrfs_key *ins_key,
30                       struct btrfs_path *path, int data_size, int extend);
31 static int push_node_left(struct btrfs_trans_handle *trans,
32                           struct btrfs_root *root, struct extent_buffer *dst,
33                           struct extent_buffer *src, int empty);
34 static int balance_node_right(struct btrfs_trans_handle *trans,
35                               struct btrfs_root *root,
36                               struct extent_buffer *dst_buf,
37                               struct extent_buffer *src_buf);
38 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39                    struct btrfs_path *path, int level, int slot);
40
41 struct btrfs_path *btrfs_alloc_path(void)
42 {
43         struct btrfs_path *path;
44         path = kmem_cache_zalloc(btrfs_path_cachep, GFP_NOFS);
45         if (path)
46                 path->reada = 1;
47         return path;
48 }
49
50 /*
51  * set all locked nodes in the path to blocking locks.  This should
52  * be done before scheduling
53  */
54 noinline void btrfs_set_path_blocking(struct btrfs_path *p)
55 {
56         int i;
57         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
58                 if (p->nodes[i] && p->locks[i])
59                         btrfs_set_lock_blocking(p->nodes[i]);
60         }
61 }
62
63 /*
64  * reset all the locked nodes in the patch to spinning locks.
65  *
66  * held is used to keep lockdep happy, when lockdep is enabled
67  * we set held to a blocking lock before we go around and
68  * retake all the spinlocks in the path.  You can safely use NULL
69  * for held
70  */
71 noinline void btrfs_clear_path_blocking(struct btrfs_path *p,
72                                         struct extent_buffer *held)
73 {
74         int i;
75
76 #ifdef CONFIG_DEBUG_LOCK_ALLOC
77         /* lockdep really cares that we take all of these spinlocks
78          * in the right order.  If any of the locks in the path are not
79          * currently blocking, it is going to complain.  So, make really
80          * really sure by forcing the path to blocking before we clear
81          * the path blocking.
82          */
83         if (held)
84                 btrfs_set_lock_blocking(held);
85         btrfs_set_path_blocking(p);
86 #endif
87
88         for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) {
89                 if (p->nodes[i] && p->locks[i])
90                         btrfs_clear_lock_blocking(p->nodes[i]);
91         }
92
93 #ifdef CONFIG_DEBUG_LOCK_ALLOC
94         if (held)
95                 btrfs_clear_lock_blocking(held);
96 #endif
97 }
98
99 /* this also releases the path */
100 void btrfs_free_path(struct btrfs_path *p)
101 {
102         btrfs_release_path(NULL, p);
103         kmem_cache_free(btrfs_path_cachep, p);
104 }
105
106 /*
107  * path release drops references on the extent buffers in the path
108  * and it drops any locks held by this path
109  *
110  * It is safe to call this on paths that no locks or extent buffers held.
111  */
112 noinline void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
113 {
114         int i;
115
116         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
117                 p->slots[i] = 0;
118                 if (!p->nodes[i])
119                         continue;
120                 if (p->locks[i]) {
121                         btrfs_tree_unlock(p->nodes[i]);
122                         p->locks[i] = 0;
123                 }
124                 free_extent_buffer(p->nodes[i]);
125                 p->nodes[i] = NULL;
126         }
127 }
128
129 /*
130  * safely gets a reference on the root node of a tree.  A lock
131  * is not taken, so a concurrent writer may put a different node
132  * at the root of the tree.  See btrfs_lock_root_node for the
133  * looping required.
134  *
135  * The extent buffer returned by this has a reference taken, so
136  * it won't disappear.  It may stop being the root of the tree
137  * at any time because there are no locks held.
138  */
139 struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
140 {
141         struct extent_buffer *eb;
142         spin_lock(&root->node_lock);
143         eb = root->node;
144         extent_buffer_get(eb);
145         spin_unlock(&root->node_lock);
146         return eb;
147 }
148
149 /* loop around taking references on and locking the root node of the
150  * tree until you end up with a lock on the root.  A locked buffer
151  * is returned, with a reference held.
152  */
153 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
154 {
155         struct extent_buffer *eb;
156
157         while (1) {
158                 eb = btrfs_root_node(root);
159                 btrfs_tree_lock(eb);
160
161                 spin_lock(&root->node_lock);
162                 if (eb == root->node) {
163                         spin_unlock(&root->node_lock);
164                         break;
165                 }
166                 spin_unlock(&root->node_lock);
167
168                 btrfs_tree_unlock(eb);
169                 free_extent_buffer(eb);
170         }
171         return eb;
172 }
173
174 /* cowonly root (everything not a reference counted cow subvolume), just get
175  * put onto a simple dirty list.  transaction.c walks this to make sure they
176  * get properly updated on disk.
177  */
178 static void add_root_to_dirty_list(struct btrfs_root *root)
179 {
180         if (root->track_dirty && list_empty(&root->dirty_list)) {
181                 list_add(&root->dirty_list,
182                          &root->fs_info->dirty_cowonly_roots);
183         }
184 }
185
186 /*
187  * used by snapshot creation to make a copy of a root for a tree with
188  * a given objectid.  The buffer with the new root node is returned in
189  * cow_ret, and this func returns zero on success or a negative error code.
190  */
191 int btrfs_copy_root(struct btrfs_trans_handle *trans,
192                       struct btrfs_root *root,
193                       struct extent_buffer *buf,
194                       struct extent_buffer **cow_ret, u64 new_root_objectid)
195 {
196         struct extent_buffer *cow;
197         u32 nritems;
198         int ret = 0;
199         int level;
200         struct btrfs_root *new_root;
201
202         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
203         if (!new_root)
204                 return -ENOMEM;
205
206         memcpy(new_root, root, sizeof(*new_root));
207         new_root->root_key.objectid = new_root_objectid;
208
209         WARN_ON(root->ref_cows && trans->transid !=
210                 root->fs_info->running_transaction->transid);
211         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
212
213         level = btrfs_header_level(buf);
214         nritems = btrfs_header_nritems(buf);
215
216         cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
217                                      new_root_objectid, trans->transid,
218                                      level, buf->start, 0);
219         if (IS_ERR(cow)) {
220                 kfree(new_root);
221                 return PTR_ERR(cow);
222         }
223
224         copy_extent_buffer(cow, buf, 0, 0, cow->len);
225         btrfs_set_header_bytenr(cow, cow->start);
226         btrfs_set_header_generation(cow, trans->transid);
227         btrfs_set_header_owner(cow, new_root_objectid);
228         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
229
230         write_extent_buffer(cow, root->fs_info->fsid,
231                             (unsigned long)btrfs_header_fsid(cow),
232                             BTRFS_FSID_SIZE);
233
234         WARN_ON(btrfs_header_generation(buf) > trans->transid);
235         ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
236         kfree(new_root);
237
238         if (ret)
239                 return ret;
240
241         btrfs_mark_buffer_dirty(cow);
242         *cow_ret = cow;
243         return 0;
244 }
245
246 /*
247  * does the dirty work in cow of a single block.  The parent block (if
248  * supplied) is updated to point to the new cow copy.  The new buffer is marked
249  * dirty and returned locked.  If you modify the block it needs to be marked
250  * dirty again.
251  *
252  * search_start -- an allocation hint for the new block
253  *
254  * empty_size -- a hint that you plan on doing more cow.  This is the size in
255  * bytes the allocator should try to find free next to the block it returns.
256  * This is just a hint and may be ignored by the allocator.
257  *
258  * prealloc_dest -- if you have already reserved a destination for the cow,
259  * this uses that block instead of allocating a new one.
260  * btrfs_alloc_reserved_extent is used to finish the allocation.
261  */
262 static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
263                              struct btrfs_root *root,
264                              struct extent_buffer *buf,
265                              struct extent_buffer *parent, int parent_slot,
266                              struct extent_buffer **cow_ret,
267                              u64 search_start, u64 empty_size,
268                              u64 prealloc_dest)
269 {
270         u64 parent_start;
271         struct extent_buffer *cow;
272         u32 nritems;
273         int ret = 0;
274         int level;
275         int unlock_orig = 0;
276
277         if (*cow_ret == buf)
278                 unlock_orig = 1;
279
280         btrfs_assert_tree_locked(buf);
281
282         if (parent)
283                 parent_start = parent->start;
284         else
285                 parent_start = 0;
286
287         WARN_ON(root->ref_cows && trans->transid !=
288                 root->fs_info->running_transaction->transid);
289         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
290
291         level = btrfs_header_level(buf);
292         nritems = btrfs_header_nritems(buf);
293
294         if (prealloc_dest) {
295                 struct btrfs_key ins;
296
297                 ins.objectid = prealloc_dest;
298                 ins.offset = buf->len;
299                 ins.type = BTRFS_EXTENT_ITEM_KEY;
300
301                 ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
302                                                   root->root_key.objectid,
303                                                   trans->transid, level, &ins);
304                 BUG_ON(ret);
305                 cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
306                                             buf->len, level);
307         } else {
308                 cow = btrfs_alloc_free_block(trans, root, buf->len,
309                                              parent_start,
310                                              root->root_key.objectid,
311                                              trans->transid, level,
312                                              search_start, empty_size);
313         }
314         if (IS_ERR(cow))
315                 return PTR_ERR(cow);
316
317         /* cow is set to blocking by btrfs_init_new_buffer */
318
319         copy_extent_buffer(cow, buf, 0, 0, cow->len);
320         btrfs_set_header_bytenr(cow, cow->start);
321         btrfs_set_header_generation(cow, trans->transid);
322         btrfs_set_header_owner(cow, root->root_key.objectid);
323         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
324
325         write_extent_buffer(cow, root->fs_info->fsid,
326                             (unsigned long)btrfs_header_fsid(cow),
327                             BTRFS_FSID_SIZE);
328
329         WARN_ON(btrfs_header_generation(buf) > trans->transid);
330         if (btrfs_header_generation(buf) != trans->transid) {
331                 u32 nr_extents;
332                 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
333                 if (ret)
334                         return ret;
335
336                 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
337                 WARN_ON(ret);
338         } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
339                 /*
340                  * There are only two places that can drop reference to
341                  * tree blocks owned by living reloc trees, one is here,
342                  * the other place is btrfs_drop_subtree. In both places,
343                  * we check reference count while tree block is locked.
344                  * Furthermore, if reference count is one, it won't get
345                  * increased by someone else.
346                  */
347                 u32 refs;
348                 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
349                                               buf->len, &refs);
350                 BUG_ON(ret);
351                 if (refs == 1) {
352                         ret = btrfs_update_ref(trans, root, buf, cow,
353                                                0, nritems);
354                         clean_tree_block(trans, root, buf);
355                 } else {
356                         ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
357                 }
358                 BUG_ON(ret);
359         } else {
360                 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
361                 if (ret)
362                         return ret;
363                 clean_tree_block(trans, root, buf);
364         }
365
366         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
367                 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
368                 WARN_ON(ret);
369         }
370
371         if (buf == root->node) {
372                 WARN_ON(parent && parent != buf);
373
374                 spin_lock(&root->node_lock);
375                 root->node = cow;
376                 extent_buffer_get(cow);
377                 spin_unlock(&root->node_lock);
378
379                 if (buf != root->commit_root) {
380                         btrfs_free_extent(trans, root, buf->start,
381                                           buf->len, buf->start,
382                                           root->root_key.objectid,
383                                           btrfs_header_generation(buf),
384                                           level, 1);
385                 }
386                 free_extent_buffer(buf);
387                 add_root_to_dirty_list(root);
388         } else {
389                 btrfs_set_node_blockptr(parent, parent_slot,
390                                         cow->start);
391                 WARN_ON(trans->transid == 0);
392                 btrfs_set_node_ptr_generation(parent, parent_slot,
393                                               trans->transid);
394                 btrfs_mark_buffer_dirty(parent);
395                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
396                 btrfs_free_extent(trans, root, buf->start, buf->len,
397                                   parent_start, btrfs_header_owner(parent),
398                                   btrfs_header_generation(parent), level, 1);
399         }
400         if (unlock_orig)
401                 btrfs_tree_unlock(buf);
402         free_extent_buffer(buf);
403         btrfs_mark_buffer_dirty(cow);
404         *cow_ret = cow;
405         return 0;
406 }
407
408 /*
409  * cows a single block, see __btrfs_cow_block for the real work.
410  * This version of it has extra checks so that a block isn't cow'd more than
411  * once per transaction, as long as it hasn't been written yet
412  */
413 noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
414                     struct btrfs_root *root, struct extent_buffer *buf,
415                     struct extent_buffer *parent, int parent_slot,
416                     struct extent_buffer **cow_ret, u64 prealloc_dest)
417 {
418         u64 search_start;
419         int ret;
420
421         if (trans->transaction != root->fs_info->running_transaction) {
422                 printk(KERN_CRIT "trans %llu running %llu\n",
423                        (unsigned long long)trans->transid,
424                        (unsigned long long)
425                        root->fs_info->running_transaction->transid);
426                 WARN_ON(1);
427         }
428         if (trans->transid != root->fs_info->generation) {
429                 printk(KERN_CRIT "trans %llu running %llu\n",
430                        (unsigned long long)trans->transid,
431                        (unsigned long long)root->fs_info->generation);
432                 WARN_ON(1);
433         }
434
435         if (btrfs_header_generation(buf) == trans->transid &&
436             btrfs_header_owner(buf) == root->root_key.objectid &&
437             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
438                 *cow_ret = buf;
439                 WARN_ON(prealloc_dest);
440                 return 0;
441         }
442
443         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
444
445         if (parent)
446                 btrfs_set_lock_blocking(parent);
447         btrfs_set_lock_blocking(buf);
448
449         ret = __btrfs_cow_block(trans, root, buf, parent,
450                                  parent_slot, cow_ret, search_start, 0,
451                                  prealloc_dest);
452         return ret;
453 }
454
455 /*
456  * helper function for defrag to decide if two blocks pointed to by a
457  * node are actually close by
458  */
459 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
460 {
461         if (blocknr < other && other - (blocknr + blocksize) < 32768)
462                 return 1;
463         if (blocknr > other && blocknr - (other + blocksize) < 32768)
464                 return 1;
465         return 0;
466 }
467
468 /*
469  * compare two keys in a memcmp fashion
470  */
471 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
472 {
473         struct btrfs_key k1;
474
475         btrfs_disk_key_to_cpu(&k1, disk);
476
477         if (k1.objectid > k2->objectid)
478                 return 1;
479         if (k1.objectid < k2->objectid)
480                 return -1;
481         if (k1.type > k2->type)
482                 return 1;
483         if (k1.type < k2->type)
484                 return -1;
485         if (k1.offset > k2->offset)
486                 return 1;
487         if (k1.offset < k2->offset)
488                 return -1;
489         return 0;
490 }
491
492 /*
493  * same as comp_keys only with two btrfs_key's
494  */
495 static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
496 {
497         if (k1->objectid > k2->objectid)
498                 return 1;
499         if (k1->objectid < k2->objectid)
500                 return -1;
501         if (k1->type > k2->type)
502                 return 1;
503         if (k1->type < k2->type)
504                 return -1;
505         if (k1->offset > k2->offset)
506                 return 1;
507         if (k1->offset < k2->offset)
508                 return -1;
509         return 0;
510 }
511
512 /*
513  * this is used by the defrag code to go through all the
514  * leaves pointed to by a node and reallocate them so that
515  * disk order is close to key order
516  */
517 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
518                        struct btrfs_root *root, struct extent_buffer *parent,
519                        int start_slot, int cache_only, u64 *last_ret,
520                        struct btrfs_key *progress)
521 {
522         struct extent_buffer *cur;
523         u64 blocknr;
524         u64 gen;
525         u64 search_start = *last_ret;
526         u64 last_block = 0;
527         u64 other;
528         u32 parent_nritems;
529         int end_slot;
530         int i;
531         int err = 0;
532         int parent_level;
533         int uptodate;
534         u32 blocksize;
535         int progress_passed = 0;
536         struct btrfs_disk_key disk_key;
537
538         parent_level = btrfs_header_level(parent);
539         if (cache_only && parent_level != 1)
540                 return 0;
541
542         if (trans->transaction != root->fs_info->running_transaction)
543                 WARN_ON(1);
544         if (trans->transid != root->fs_info->generation)
545                 WARN_ON(1);
546
547         parent_nritems = btrfs_header_nritems(parent);
548         blocksize = btrfs_level_size(root, parent_level - 1);
549         end_slot = parent_nritems;
550
551         if (parent_nritems == 1)
552                 return 0;
553
554         btrfs_set_lock_blocking(parent);
555
556         for (i = start_slot; i < end_slot; i++) {
557                 int close = 1;
558
559                 if (!parent->map_token) {
560                         map_extent_buffer(parent,
561                                         btrfs_node_key_ptr_offset(i),
562                                         sizeof(struct btrfs_key_ptr),
563                                         &parent->map_token, &parent->kaddr,
564                                         &parent->map_start, &parent->map_len,
565                                         KM_USER1);
566                 }
567                 btrfs_node_key(parent, &disk_key, i);
568                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
569                         continue;
570
571                 progress_passed = 1;
572                 blocknr = btrfs_node_blockptr(parent, i);
573                 gen = btrfs_node_ptr_generation(parent, i);
574                 if (last_block == 0)
575                         last_block = blocknr;
576
577                 if (i > 0) {
578                         other = btrfs_node_blockptr(parent, i - 1);
579                         close = close_blocks(blocknr, other, blocksize);
580                 }
581                 if (!close && i < end_slot - 2) {
582                         other = btrfs_node_blockptr(parent, i + 1);
583                         close = close_blocks(blocknr, other, blocksize);
584                 }
585                 if (close) {
586                         last_block = blocknr;
587                         continue;
588                 }
589                 if (parent->map_token) {
590                         unmap_extent_buffer(parent, parent->map_token,
591                                             KM_USER1);
592                         parent->map_token = NULL;
593                 }
594
595                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
596                 if (cur)
597                         uptodate = btrfs_buffer_uptodate(cur, gen);
598                 else
599                         uptodate = 0;
600                 if (!cur || !uptodate) {
601                         if (cache_only) {
602                                 free_extent_buffer(cur);
603                                 continue;
604                         }
605                         if (!cur) {
606                                 cur = read_tree_block(root, blocknr,
607                                                          blocksize, gen);
608                         } else if (!uptodate) {
609                                 btrfs_read_buffer(cur, gen);
610                         }
611                 }
612                 if (search_start == 0)
613                         search_start = last_block;
614
615                 btrfs_tree_lock(cur);
616                 btrfs_set_lock_blocking(cur);
617                 err = __btrfs_cow_block(trans, root, cur, parent, i,
618                                         &cur, search_start,
619                                         min(16 * blocksize,
620                                             (end_slot - i) * blocksize), 0);
621                 if (err) {
622                         btrfs_tree_unlock(cur);
623                         free_extent_buffer(cur);
624                         break;
625                 }
626                 search_start = cur->start;
627                 last_block = cur->start;
628                 *last_ret = search_start;
629                 btrfs_tree_unlock(cur);
630                 free_extent_buffer(cur);
631         }
632         if (parent->map_token) {
633                 unmap_extent_buffer(parent, parent->map_token,
634                                     KM_USER1);
635                 parent->map_token = NULL;
636         }
637         return err;
638 }
639
640 /*
641  * The leaf data grows from end-to-front in the node.
642  * this returns the address of the start of the last item,
643  * which is the stop of the leaf data stack
644  */
645 static inline unsigned int leaf_data_end(struct btrfs_root *root,
646                                          struct extent_buffer *leaf)
647 {
648         u32 nr = btrfs_header_nritems(leaf);
649         if (nr == 0)
650                 return BTRFS_LEAF_DATA_SIZE(root);
651         return btrfs_item_offset_nr(leaf, nr - 1);
652 }
653
654 /*
655  * extra debugging checks to make sure all the items in a key are
656  * well formed and in the proper order
657  */
658 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
659                       int level)
660 {
661         struct extent_buffer *parent = NULL;
662         struct extent_buffer *node = path->nodes[level];
663         struct btrfs_disk_key parent_key;
664         struct btrfs_disk_key node_key;
665         int parent_slot;
666         int slot;
667         struct btrfs_key cpukey;
668         u32 nritems = btrfs_header_nritems(node);
669
670         if (path->nodes[level + 1])
671                 parent = path->nodes[level + 1];
672
673         slot = path->slots[level];
674         BUG_ON(nritems == 0);
675         if (parent) {
676                 parent_slot = path->slots[level + 1];
677                 btrfs_node_key(parent, &parent_key, parent_slot);
678                 btrfs_node_key(node, &node_key, 0);
679                 BUG_ON(memcmp(&parent_key, &node_key,
680                               sizeof(struct btrfs_disk_key)));
681                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
682                        btrfs_header_bytenr(node));
683         }
684         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
685         if (slot != 0) {
686                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
687                 btrfs_node_key(node, &node_key, slot);
688                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
689         }
690         if (slot < nritems - 1) {
691                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
692                 btrfs_node_key(node, &node_key, slot);
693                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
694         }
695         return 0;
696 }
697
698 /*
699  * extra checking to make sure all the items in a leaf are
700  * well formed and in the proper order
701  */
702 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
703                       int level)
704 {
705         struct extent_buffer *leaf = path->nodes[level];
706         struct extent_buffer *parent = NULL;
707         int parent_slot;
708         struct btrfs_key cpukey;
709         struct btrfs_disk_key parent_key;
710         struct btrfs_disk_key leaf_key;
711         int slot = path->slots[0];
712
713         u32 nritems = btrfs_header_nritems(leaf);
714
715         if (path->nodes[level + 1])
716                 parent = path->nodes[level + 1];
717
718         if (nritems == 0)
719                 return 0;
720
721         if (parent) {
722                 parent_slot = path->slots[level + 1];
723                 btrfs_node_key(parent, &parent_key, parent_slot);
724                 btrfs_item_key(leaf, &leaf_key, 0);
725
726                 BUG_ON(memcmp(&parent_key, &leaf_key,
727                        sizeof(struct btrfs_disk_key)));
728                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
729                        btrfs_header_bytenr(leaf));
730         }
731         if (slot != 0 && slot < nritems - 1) {
732                 btrfs_item_key(leaf, &leaf_key, slot);
733                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
734                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
735                         btrfs_print_leaf(root, leaf);
736                         printk(KERN_CRIT "slot %d offset bad key\n", slot);
737                         BUG_ON(1);
738                 }
739                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
740                        btrfs_item_end_nr(leaf, slot)) {
741                         btrfs_print_leaf(root, leaf);
742                         printk(KERN_CRIT "slot %d offset bad\n", slot);
743                         BUG_ON(1);
744                 }
745         }
746         if (slot < nritems - 1) {
747                 btrfs_item_key(leaf, &leaf_key, slot);
748                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
749                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
750                 if (btrfs_item_offset_nr(leaf, slot) !=
751                         btrfs_item_end_nr(leaf, slot + 1)) {
752                         btrfs_print_leaf(root, leaf);
753                         printk(KERN_CRIT "slot %d offset bad\n", slot);
754                         BUG_ON(1);
755                 }
756         }
757         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
758                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
759         return 0;
760 }
761
762 static noinline int check_block(struct btrfs_root *root,
763                                 struct btrfs_path *path, int level)
764 {
765         return 0;
766         if (level == 0)
767                 return check_leaf(root, path, level);
768         return check_node(root, path, level);
769 }
770
771 /*
772  * search for key in the extent_buffer.  The items start at offset p,
773  * and they are item_size apart.  There are 'max' items in p.
774  *
775  * the slot in the array is returned via slot, and it points to
776  * the place where you would insert key if it is not found in
777  * the array.
778  *
779  * slot may point to max if the key is bigger than all of the keys
780  */
781 static noinline int generic_bin_search(struct extent_buffer *eb,
782                                        unsigned long p,
783                                        int item_size, struct btrfs_key *key,
784                                        int max, int *slot)
785 {
786         int low = 0;
787         int high = max;
788         int mid;
789         int ret;
790         struct btrfs_disk_key *tmp = NULL;
791         struct btrfs_disk_key unaligned;
792         unsigned long offset;
793         char *map_token = NULL;
794         char *kaddr = NULL;
795         unsigned long map_start = 0;
796         unsigned long map_len = 0;
797         int err;
798
799         while (low < high) {
800                 mid = (low + high) / 2;
801                 offset = p + mid * item_size;
802
803                 if (!map_token || offset < map_start ||
804                     (offset + sizeof(struct btrfs_disk_key)) >
805                     map_start + map_len) {
806                         if (map_token) {
807                                 unmap_extent_buffer(eb, map_token, KM_USER0);
808                                 map_token = NULL;
809                         }
810
811                         err = map_private_extent_buffer(eb, offset,
812                                                 sizeof(struct btrfs_disk_key),
813                                                 &map_token, &kaddr,
814                                                 &map_start, &map_len, KM_USER0);
815
816                         if (!err) {
817                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
818                                                         map_start);
819                         } else {
820                                 read_extent_buffer(eb, &unaligned,
821                                                    offset, sizeof(unaligned));
822                                 tmp = &unaligned;
823                         }
824
825                 } else {
826                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
827                                                         map_start);
828                 }
829                 ret = comp_keys(tmp, key);
830
831                 if (ret < 0)
832                         low = mid + 1;
833                 else if (ret > 0)
834                         high = mid;
835                 else {
836                         *slot = mid;
837                         if (map_token)
838                                 unmap_extent_buffer(eb, map_token, KM_USER0);
839                         return 0;
840                 }
841         }
842         *slot = low;
843         if (map_token)
844                 unmap_extent_buffer(eb, map_token, KM_USER0);
845         return 1;
846 }
847
848 /*
849  * simple bin_search frontend that does the right thing for
850  * leaves vs nodes
851  */
852 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
853                       int level, int *slot)
854 {
855         if (level == 0) {
856                 return generic_bin_search(eb,
857                                           offsetof(struct btrfs_leaf, items),
858                                           sizeof(struct btrfs_item),
859                                           key, btrfs_header_nritems(eb),
860                                           slot);
861         } else {
862                 return generic_bin_search(eb,
863                                           offsetof(struct btrfs_node, ptrs),
864                                           sizeof(struct btrfs_key_ptr),
865                                           key, btrfs_header_nritems(eb),
866                                           slot);
867         }
868         return -1;
869 }
870
871 /* given a node and slot number, this reads the blocks it points to.  The
872  * extent buffer is returned with a reference taken (but unlocked).
873  * NULL is returned on error.
874  */
875 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
876                                    struct extent_buffer *parent, int slot)
877 {
878         int level = btrfs_header_level(parent);
879         if (slot < 0)
880                 return NULL;
881         if (slot >= btrfs_header_nritems(parent))
882                 return NULL;
883
884         BUG_ON(level == 0);
885
886         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
887                        btrfs_level_size(root, level - 1),
888                        btrfs_node_ptr_generation(parent, slot));
889 }
890
891 /*
892  * node level balancing, used to make sure nodes are in proper order for
893  * item deletion.  We balance from the top down, so we have to make sure
894  * that a deletion won't leave an node completely empty later on.
895  */
896 static noinline int balance_level(struct btrfs_trans_handle *trans,
897                          struct btrfs_root *root,
898                          struct btrfs_path *path, int level)
899 {
900         struct extent_buffer *right = NULL;
901         struct extent_buffer *mid;
902         struct extent_buffer *left = NULL;
903         struct extent_buffer *parent = NULL;
904         int ret = 0;
905         int wret;
906         int pslot;
907         int orig_slot = path->slots[level];
908         int err_on_enospc = 0;
909         u64 orig_ptr;
910
911         if (level == 0)
912                 return 0;
913
914         mid = path->nodes[level];
915
916         WARN_ON(!path->locks[level]);
917         WARN_ON(btrfs_header_generation(mid) != trans->transid);
918
919         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
920
921         if (level < BTRFS_MAX_LEVEL - 1)
922                 parent = path->nodes[level + 1];
923         pslot = path->slots[level + 1];
924
925         /*
926          * deal with the case where there is only one pointer in the root
927          * by promoting the node below to a root
928          */
929         if (!parent) {
930                 struct extent_buffer *child;
931
932                 if (btrfs_header_nritems(mid) != 1)
933                         return 0;
934
935                 /* promote the child to a root */
936                 child = read_node_slot(root, mid, 0);
937                 BUG_ON(!child);
938                 btrfs_tree_lock(child);
939                 btrfs_set_lock_blocking(child);
940                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
941                 BUG_ON(ret);
942
943                 spin_lock(&root->node_lock);
944                 root->node = child;
945                 spin_unlock(&root->node_lock);
946
947                 ret = btrfs_update_extent_ref(trans, root, child->start,
948                                               mid->start, child->start,
949                                               root->root_key.objectid,
950                                               trans->transid, level - 1);
951                 BUG_ON(ret);
952
953                 add_root_to_dirty_list(root);
954                 btrfs_tree_unlock(child);
955
956                 path->locks[level] = 0;
957                 path->nodes[level] = NULL;
958                 clean_tree_block(trans, root, mid);
959                 btrfs_tree_unlock(mid);
960                 /* once for the path */
961                 free_extent_buffer(mid);
962                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
963                                         mid->start, root->root_key.objectid,
964                                         btrfs_header_generation(mid),
965                                         level, 1);
966                 /* once for the root ptr */
967                 free_extent_buffer(mid);
968                 return ret;
969         }
970         if (btrfs_header_nritems(mid) >
971             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
972                 return 0;
973
974         if (btrfs_header_nritems(mid) < 2)
975                 err_on_enospc = 1;
976
977         left = read_node_slot(root, parent, pslot - 1);
978         if (left) {
979                 btrfs_tree_lock(left);
980                 btrfs_set_lock_blocking(left);
981                 wret = btrfs_cow_block(trans, root, left,
982                                        parent, pslot - 1, &left, 0);
983                 if (wret) {
984                         ret = wret;
985                         goto enospc;
986                 }
987         }
988         right = read_node_slot(root, parent, pslot + 1);
989         if (right) {
990                 btrfs_tree_lock(right);
991                 btrfs_set_lock_blocking(right);
992                 wret = btrfs_cow_block(trans, root, right,
993                                        parent, pslot + 1, &right, 0);
994                 if (wret) {
995                         ret = wret;
996                         goto enospc;
997                 }
998         }
999
1000         /* first, try to make some room in the middle buffer */
1001         if (left) {
1002                 orig_slot += btrfs_header_nritems(left);
1003                 wret = push_node_left(trans, root, left, mid, 1);
1004                 if (wret < 0)
1005                         ret = wret;
1006                 if (btrfs_header_nritems(mid) < 2)
1007                         err_on_enospc = 1;
1008         }
1009
1010         /*
1011          * then try to empty the right most buffer into the middle
1012          */
1013         if (right) {
1014                 wret = push_node_left(trans, root, mid, right, 1);
1015                 if (wret < 0 && wret != -ENOSPC)
1016                         ret = wret;
1017                 if (btrfs_header_nritems(right) == 0) {
1018                         u64 bytenr = right->start;
1019                         u64 generation = btrfs_header_generation(parent);
1020                         u32 blocksize = right->len;
1021
1022                         clean_tree_block(trans, root, right);
1023                         btrfs_tree_unlock(right);
1024                         free_extent_buffer(right);
1025                         right = NULL;
1026                         wret = del_ptr(trans, root, path, level + 1, pslot +
1027                                        1);
1028                         if (wret)
1029                                 ret = wret;
1030                         wret = btrfs_free_extent(trans, root, bytenr,
1031                                                  blocksize, parent->start,
1032                                                  btrfs_header_owner(parent),
1033                                                  generation, level, 1);
1034                         if (wret)
1035                                 ret = wret;
1036                 } else {
1037                         struct btrfs_disk_key right_key;
1038                         btrfs_node_key(right, &right_key, 0);
1039                         btrfs_set_node_key(parent, &right_key, pslot + 1);
1040                         btrfs_mark_buffer_dirty(parent);
1041                 }
1042         }
1043         if (btrfs_header_nritems(mid) == 1) {
1044                 /*
1045                  * we're not allowed to leave a node with one item in the
1046                  * tree during a delete.  A deletion from lower in the tree
1047                  * could try to delete the only pointer in this node.
1048                  * So, pull some keys from the left.
1049                  * There has to be a left pointer at this point because
1050                  * otherwise we would have pulled some pointers from the
1051                  * right
1052                  */
1053                 BUG_ON(!left);
1054                 wret = balance_node_right(trans, root, mid, left);
1055                 if (wret < 0) {
1056                         ret = wret;
1057                         goto enospc;
1058                 }
1059                 if (wret == 1) {
1060                         wret = push_node_left(trans, root, left, mid, 1);
1061                         if (wret < 0)
1062                                 ret = wret;
1063                 }
1064                 BUG_ON(wret == 1);
1065         }
1066         if (btrfs_header_nritems(mid) == 0) {
1067                 /* we've managed to empty the middle node, drop it */
1068                 u64 root_gen = btrfs_header_generation(parent);
1069                 u64 bytenr = mid->start;
1070                 u32 blocksize = mid->len;
1071
1072                 clean_tree_block(trans, root, mid);
1073                 btrfs_tree_unlock(mid);
1074                 free_extent_buffer(mid);
1075                 mid = NULL;
1076                 wret = del_ptr(trans, root, path, level + 1, pslot);
1077                 if (wret)
1078                         ret = wret;
1079                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1080                                          parent->start,
1081                                          btrfs_header_owner(parent),
1082                                          root_gen, level, 1);
1083                 if (wret)
1084                         ret = wret;
1085         } else {
1086                 /* update the parent key to reflect our changes */
1087                 struct btrfs_disk_key mid_key;
1088                 btrfs_node_key(mid, &mid_key, 0);
1089                 btrfs_set_node_key(parent, &mid_key, pslot);
1090                 btrfs_mark_buffer_dirty(parent);
1091         }
1092
1093         /* update the path */
1094         if (left) {
1095                 if (btrfs_header_nritems(left) > orig_slot) {
1096                         extent_buffer_get(left);
1097                         /* left was locked after cow */
1098                         path->nodes[level] = left;
1099                         path->slots[level + 1] -= 1;
1100                         path->slots[level] = orig_slot;
1101                         if (mid) {
1102                                 btrfs_tree_unlock(mid);
1103                                 free_extent_buffer(mid);
1104                         }
1105                 } else {
1106                         orig_slot -= btrfs_header_nritems(left);
1107                         path->slots[level] = orig_slot;
1108                 }
1109         }
1110         /* double check we haven't messed things up */
1111         check_block(root, path, level);
1112         if (orig_ptr !=
1113             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1114                 BUG();
1115 enospc:
1116         if (right) {
1117                 btrfs_tree_unlock(right);
1118                 free_extent_buffer(right);
1119         }
1120         if (left) {
1121                 if (path->nodes[level] != left)
1122                         btrfs_tree_unlock(left);
1123                 free_extent_buffer(left);
1124         }
1125         return ret;
1126 }
1127
1128 /* Node balancing for insertion.  Here we only split or push nodes around
1129  * when they are completely full.  This is also done top down, so we
1130  * have to be pessimistic.
1131  */
1132 static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1133                                           struct btrfs_root *root,
1134                                           struct btrfs_path *path, int level)
1135 {
1136         struct extent_buffer *right = NULL;
1137         struct extent_buffer *mid;
1138         struct extent_buffer *left = NULL;
1139         struct extent_buffer *parent = NULL;
1140         int ret = 0;
1141         int wret;
1142         int pslot;
1143         int orig_slot = path->slots[level];
1144         u64 orig_ptr;
1145
1146         if (level == 0)
1147                 return 1;
1148
1149         mid = path->nodes[level];
1150         WARN_ON(btrfs_header_generation(mid) != trans->transid);
1151         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1152
1153         if (level < BTRFS_MAX_LEVEL - 1)
1154                 parent = path->nodes[level + 1];
1155         pslot = path->slots[level + 1];
1156
1157         if (!parent)
1158                 return 1;
1159
1160         left = read_node_slot(root, parent, pslot - 1);
1161
1162         /* first, try to make some room in the middle buffer */
1163         if (left) {
1164                 u32 left_nr;
1165
1166                 btrfs_tree_lock(left);
1167                 btrfs_set_lock_blocking(left);
1168
1169                 left_nr = btrfs_header_nritems(left);
1170                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1171                         wret = 1;
1172                 } else {
1173                         ret = btrfs_cow_block(trans, root, left, parent,
1174                                               pslot - 1, &left, 0);
1175                         if (ret)
1176                                 wret = 1;
1177                         else {
1178                                 wret = push_node_left(trans, root,
1179                                                       left, mid, 0);
1180                         }
1181                 }
1182                 if (wret < 0)
1183                         ret = wret;
1184                 if (wret == 0) {
1185                         struct btrfs_disk_key disk_key;
1186                         orig_slot += left_nr;
1187                         btrfs_node_key(mid, &disk_key, 0);
1188                         btrfs_set_node_key(parent, &disk_key, pslot);
1189                         btrfs_mark_buffer_dirty(parent);
1190                         if (btrfs_header_nritems(left) > orig_slot) {
1191                                 path->nodes[level] = left;
1192                                 path->slots[level + 1] -= 1;
1193                                 path->slots[level] = orig_slot;
1194                                 btrfs_tree_unlock(mid);
1195                                 free_extent_buffer(mid);
1196                         } else {
1197                                 orig_slot -=
1198                                         btrfs_header_nritems(left);
1199                                 path->slots[level] = orig_slot;
1200                                 btrfs_tree_unlock(left);
1201                                 free_extent_buffer(left);
1202                         }
1203                         return 0;
1204                 }
1205                 btrfs_tree_unlock(left);
1206                 free_extent_buffer(left);
1207         }
1208         right = read_node_slot(root, parent, pslot + 1);
1209
1210         /*
1211          * then try to empty the right most buffer into the middle
1212          */
1213         if (right) {
1214                 u32 right_nr;
1215
1216                 btrfs_tree_lock(right);
1217                 btrfs_set_lock_blocking(right);
1218
1219                 right_nr = btrfs_header_nritems(right);
1220                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1221                         wret = 1;
1222                 } else {
1223                         ret = btrfs_cow_block(trans, root, right,
1224                                               parent, pslot + 1,
1225                                               &right, 0);
1226                         if (ret)
1227                                 wret = 1;
1228                         else {
1229                                 wret = balance_node_right(trans, root,
1230                                                           right, mid);
1231                         }
1232                 }
1233                 if (wret < 0)
1234                         ret = wret;
1235                 if (wret == 0) {
1236                         struct btrfs_disk_key disk_key;
1237
1238                         btrfs_node_key(right, &disk_key, 0);
1239                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
1240                         btrfs_mark_buffer_dirty(parent);
1241
1242                         if (btrfs_header_nritems(mid) <= orig_slot) {
1243                                 path->nodes[level] = right;
1244                                 path->slots[level + 1] += 1;
1245                                 path->slots[level] = orig_slot -
1246                                         btrfs_header_nritems(mid);
1247                                 btrfs_tree_unlock(mid);
1248                                 free_extent_buffer(mid);
1249                         } else {
1250                                 btrfs_tree_unlock(right);
1251                                 free_extent_buffer(right);
1252                         }
1253                         return 0;
1254                 }
1255                 btrfs_tree_unlock(right);
1256                 free_extent_buffer(right);
1257         }
1258         return 1;
1259 }
1260
1261 /*
1262  * readahead one full node of leaves, finding things that are close
1263  * to the block in 'slot', and triggering ra on them.
1264  */
1265 static noinline void reada_for_search(struct btrfs_root *root,
1266                                       struct btrfs_path *path,
1267                                       int level, int slot, u64 objectid)
1268 {
1269         struct extent_buffer *node;
1270         struct btrfs_disk_key disk_key;
1271         u32 nritems;
1272         u64 search;
1273         u64 target;
1274         u64 nread = 0;
1275         int direction = path->reada;
1276         struct extent_buffer *eb;
1277         u32 nr;
1278         u32 blocksize;
1279         u32 nscan = 0;
1280
1281         if (level != 1)
1282                 return;
1283
1284         if (!path->nodes[level])
1285                 return;
1286
1287         node = path->nodes[level];
1288
1289         search = btrfs_node_blockptr(node, slot);
1290         blocksize = btrfs_level_size(root, level - 1);
1291         eb = btrfs_find_tree_block(root, search, blocksize);
1292         if (eb) {
1293                 free_extent_buffer(eb);
1294                 return;
1295         }
1296
1297         target = search;
1298
1299         nritems = btrfs_header_nritems(node);
1300         nr = slot;
1301         while (1) {
1302                 if (direction < 0) {
1303                         if (nr == 0)
1304                                 break;
1305                         nr--;
1306                 } else if (direction > 0) {
1307                         nr++;
1308                         if (nr >= nritems)
1309                                 break;
1310                 }
1311                 if (path->reada < 0 && objectid) {
1312                         btrfs_node_key(node, &disk_key, nr);
1313                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1314                                 break;
1315                 }
1316                 search = btrfs_node_blockptr(node, nr);
1317                 if ((search <= target && target - search <= 65536) ||
1318                     (search > target && search - target <= 65536)) {
1319                         readahead_tree_block(root, search, blocksize,
1320                                      btrfs_node_ptr_generation(node, nr));
1321                         nread += blocksize;
1322                 }
1323                 nscan++;
1324                 if ((nread > 65536 || nscan > 32))
1325                         break;
1326         }
1327 }
1328
1329 /*
1330  * returns -EAGAIN if it had to drop the path, or zero if everything was in
1331  * cache
1332  */
1333 static noinline int reada_for_balance(struct btrfs_root *root,
1334                                       struct btrfs_path *path, int level)
1335 {
1336         int slot;
1337         int nritems;
1338         struct extent_buffer *parent;
1339         struct extent_buffer *eb;
1340         u64 gen;
1341         u64 block1 = 0;
1342         u64 block2 = 0;
1343         int ret = 0;
1344         int blocksize;
1345
1346         parent = path->nodes[level - 1];
1347         if (!parent)
1348                 return 0;
1349
1350         nritems = btrfs_header_nritems(parent);
1351         slot = path->slots[level];
1352         blocksize = btrfs_level_size(root, level);
1353
1354         if (slot > 0) {
1355                 block1 = btrfs_node_blockptr(parent, slot - 1);
1356                 gen = btrfs_node_ptr_generation(parent, slot - 1);
1357                 eb = btrfs_find_tree_block(root, block1, blocksize);
1358                 if (eb && btrfs_buffer_uptodate(eb, gen))
1359                         block1 = 0;
1360                 free_extent_buffer(eb);
1361         }
1362         if (slot < nritems) {
1363                 block2 = btrfs_node_blockptr(parent, slot + 1);
1364                 gen = btrfs_node_ptr_generation(parent, slot + 1);
1365                 eb = btrfs_find_tree_block(root, block2, blocksize);
1366                 if (eb && btrfs_buffer_uptodate(eb, gen))
1367                         block2 = 0;
1368                 free_extent_buffer(eb);
1369         }
1370         if (block1 || block2) {
1371                 ret = -EAGAIN;
1372                 btrfs_release_path(root, path);
1373                 if (block1)
1374                         readahead_tree_block(root, block1, blocksize, 0);
1375                 if (block2)
1376                         readahead_tree_block(root, block2, blocksize, 0);
1377
1378                 if (block1) {
1379                         eb = read_tree_block(root, block1, blocksize, 0);
1380                         free_extent_buffer(eb);
1381                 }
1382                 if (block1) {
1383                         eb = read_tree_block(root, block2, blocksize, 0);
1384                         free_extent_buffer(eb);
1385                 }
1386         }
1387         return ret;
1388 }
1389
1390
1391 /*
1392  * when we walk down the tree, it is usually safe to unlock the higher layers
1393  * in the tree.  The exceptions are when our path goes through slot 0, because
1394  * operations on the tree might require changing key pointers higher up in the
1395  * tree.
1396  *
1397  * callers might also have set path->keep_locks, which tells this code to keep
1398  * the lock if the path points to the last slot in the block.  This is part of
1399  * walking through the tree, and selecting the next slot in the higher block.
1400  *
1401  * lowest_unlock sets the lowest level in the tree we're allowed to unlock.  so
1402  * if lowest_unlock is 1, level 0 won't be unlocked
1403  */
1404 static noinline void unlock_up(struct btrfs_path *path, int level,
1405                                int lowest_unlock)
1406 {
1407         int i;
1408         int skip_level = level;
1409         int no_skips = 0;
1410         struct extent_buffer *t;
1411
1412         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1413                 if (!path->nodes[i])
1414                         break;
1415                 if (!path->locks[i])
1416                         break;
1417                 if (!no_skips && path->slots[i] == 0) {
1418                         skip_level = i + 1;
1419                         continue;
1420                 }
1421                 if (!no_skips && path->keep_locks) {
1422                         u32 nritems;
1423                         t = path->nodes[i];
1424                         nritems = btrfs_header_nritems(t);
1425                         if (nritems < 1 || path->slots[i] >= nritems - 1) {
1426                                 skip_level = i + 1;
1427                                 continue;
1428                         }
1429                 }
1430                 if (skip_level < i && i >= lowest_unlock)
1431                         no_skips = 1;
1432
1433                 t = path->nodes[i];
1434                 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1435                         btrfs_tree_unlock(t);
1436                         path->locks[i] = 0;
1437                 }
1438         }
1439 }
1440
1441 /*
1442  * This releases any locks held in the path starting at level and
1443  * going all the way up to the root.
1444  *
1445  * btrfs_search_slot will keep the lock held on higher nodes in a few
1446  * corner cases, such as COW of the block at slot zero in the node.  This
1447  * ignores those rules, and it should only be called when there are no
1448  * more updates to be done higher up in the tree.
1449  */
1450 noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
1451 {
1452         int i;
1453
1454         if (path->keep_locks || path->lowest_level)
1455                 return;
1456
1457         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1458                 if (!path->nodes[i])
1459                         continue;
1460                 if (!path->locks[i])
1461                         continue;
1462                 btrfs_tree_unlock(path->nodes[i]);
1463                 path->locks[i] = 0;
1464         }
1465 }
1466
1467 /*
1468  * look for key in the tree.  path is filled in with nodes along the way
1469  * if key is found, we return zero and you can find the item in the leaf
1470  * level of the path (level 0)
1471  *
1472  * If the key isn't found, the path points to the slot where it should
1473  * be inserted, and 1 is returned.  If there are other errors during the
1474  * search a negative error number is returned.
1475  *
1476  * if ins_len > 0, nodes and leaves will be split as we walk down the
1477  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1478  * possible)
1479  */
1480 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1481                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1482                       ins_len, int cow)
1483 {
1484         struct extent_buffer *b;
1485         struct extent_buffer *tmp;
1486         int slot;
1487         int ret;
1488         int level;
1489         int should_reada = p->reada;
1490         int lowest_unlock = 1;
1491         int blocksize;
1492         u8 lowest_level = 0;
1493         u64 blocknr;
1494         u64 gen;
1495         struct btrfs_key prealloc_block;
1496
1497         lowest_level = p->lowest_level;
1498         WARN_ON(lowest_level && ins_len > 0);
1499         WARN_ON(p->nodes[0] != NULL);
1500
1501         if (ins_len < 0)
1502                 lowest_unlock = 2;
1503
1504         prealloc_block.objectid = 0;
1505
1506 again:
1507         if (p->skip_locking)
1508                 b = btrfs_root_node(root);
1509         else
1510                 b = btrfs_lock_root_node(root);
1511
1512         while (b) {
1513                 level = btrfs_header_level(b);
1514
1515                 /*
1516                  * setup the path here so we can release it under lock
1517                  * contention with the cow code
1518                  */
1519                 p->nodes[level] = b;
1520                 if (!p->skip_locking)
1521                         p->locks[level] = 1;
1522
1523                 if (cow) {
1524                         int wret;
1525
1526                         /* is a cow on this block not required */
1527                         if (btrfs_header_generation(b) == trans->transid &&
1528                             btrfs_header_owner(b) == root->root_key.objectid &&
1529                             !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1530                                 goto cow_done;
1531                         }
1532
1533                         /* ok, we have to cow, is our old prealloc the right
1534                          * size?
1535                          */
1536                         if (prealloc_block.objectid &&
1537                             prealloc_block.offset != b->len) {
1538                                 btrfs_release_path(root, p);
1539                                 btrfs_free_reserved_extent(root,
1540                                            prealloc_block.objectid,
1541                                            prealloc_block.offset);
1542                                 prealloc_block.objectid = 0;
1543                                 goto again;
1544                         }
1545
1546                         /*
1547                          * for higher level blocks, try not to allocate blocks
1548                          * with the block and the parent locks held.
1549                          */
1550                         if (level > 0 && !prealloc_block.objectid) {
1551                                 u32 size = b->len;
1552                                 u64 hint = b->start;
1553
1554                                 btrfs_release_path(root, p);
1555                                 ret = btrfs_reserve_extent(trans, root,
1556                                                            size, size, 0,
1557                                                            hint, (u64)-1,
1558                                                            &prealloc_block, 0);
1559                                 BUG_ON(ret);
1560                                 goto again;
1561                         }
1562
1563                         btrfs_set_path_blocking(p);
1564
1565                         wret = btrfs_cow_block(trans, root, b,
1566                                                p->nodes[level + 1],
1567                                                p->slots[level + 1],
1568                                                &b, prealloc_block.objectid);
1569                         prealloc_block.objectid = 0;
1570                         if (wret) {
1571                                 free_extent_buffer(b);
1572                                 ret = wret;
1573                                 goto done;
1574                         }
1575                 }
1576 cow_done:
1577                 BUG_ON(!cow && ins_len);
1578                 if (level != btrfs_header_level(b))
1579                         WARN_ON(1);
1580                 level = btrfs_header_level(b);
1581
1582                 p->nodes[level] = b;
1583                 if (!p->skip_locking)
1584                         p->locks[level] = 1;
1585
1586                 btrfs_clear_path_blocking(p, NULL);
1587
1588                 /*
1589                  * we have a lock on b and as long as we aren't changing
1590                  * the tree, there is no way to for the items in b to change.
1591                  * It is safe to drop the lock on our parent before we
1592                  * go through the expensive btree search on b.
1593                  *
1594                  * If cow is true, then we might be changing slot zero,
1595                  * which may require changing the parent.  So, we can't
1596                  * drop the lock until after we know which slot we're
1597                  * operating on.
1598                  */
1599                 if (!cow)
1600                         btrfs_unlock_up_safe(p, level + 1);
1601
1602                 ret = check_block(root, p, level);
1603                 if (ret) {
1604                         ret = -1;
1605                         goto done;
1606                 }
1607
1608                 ret = bin_search(b, key, level, &slot);
1609
1610                 if (level != 0) {
1611                         if (ret && slot > 0)
1612                                 slot -= 1;
1613                         p->slots[level] = slot;
1614                         if ((p->search_for_split || ins_len > 0) &&
1615                             btrfs_header_nritems(b) >=
1616                             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1617                                 int sret;
1618
1619                                 sret = reada_for_balance(root, p, level);
1620                                 if (sret)
1621                                         goto again;
1622
1623                                 btrfs_set_path_blocking(p);
1624                                 sret = split_node(trans, root, p, level);
1625                                 btrfs_clear_path_blocking(p, NULL);
1626
1627                                 BUG_ON(sret > 0);
1628                                 if (sret) {
1629                                         ret = sret;
1630                                         goto done;
1631                                 }
1632                                 b = p->nodes[level];
1633                                 slot = p->slots[level];
1634                         } else if (ins_len < 0 &&
1635                                    btrfs_header_nritems(b) <
1636                                    BTRFS_NODEPTRS_PER_BLOCK(root) / 4) {
1637                                 int sret;
1638
1639                                 sret = reada_for_balance(root, p, level);
1640                                 if (sret)
1641                                         goto again;
1642
1643                                 btrfs_set_path_blocking(p);
1644                                 sret = balance_level(trans, root, p, level);
1645                                 btrfs_clear_path_blocking(p, NULL);
1646
1647                                 if (sret) {
1648                                         ret = sret;
1649                                         goto done;
1650                                 }
1651                                 b = p->nodes[level];
1652                                 if (!b) {
1653                                         btrfs_release_path(NULL, p);
1654                                         goto again;
1655                                 }
1656                                 slot = p->slots[level];
1657                                 BUG_ON(btrfs_header_nritems(b) == 1);
1658                         }
1659                         unlock_up(p, level, lowest_unlock);
1660
1661                         /* this is only true while dropping a snapshot */
1662                         if (level == lowest_level) {
1663                                 ret = 0;
1664                                 goto done;
1665                         }
1666
1667                         blocknr = btrfs_node_blockptr(b, slot);
1668                         gen = btrfs_node_ptr_generation(b, slot);
1669                         blocksize = btrfs_level_size(root, level - 1);
1670
1671                         tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1672                         if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1673                                 b = tmp;
1674                         } else {
1675                                 /*
1676                                  * reduce lock contention at high levels
1677                                  * of the btree by dropping locks before
1678                                  * we read.
1679                                  */
1680                                 if (level > 0) {
1681                                         btrfs_release_path(NULL, p);
1682                                         if (tmp)
1683                                                 free_extent_buffer(tmp);
1684                                         if (should_reada)
1685                                                 reada_for_search(root, p,
1686                                                                  level, slot,
1687                                                                  key->objectid);
1688
1689                                         tmp = read_tree_block(root, blocknr,
1690                                                          blocksize, gen);
1691                                         if (tmp)
1692                                                 free_extent_buffer(tmp);
1693                                         goto again;
1694                                 } else {
1695                                         btrfs_set_path_blocking(p);
1696                                         if (tmp)
1697                                                 free_extent_buffer(tmp);
1698                                         if (should_reada)
1699                                                 reada_for_search(root, p,
1700                                                                  level, slot,
1701                                                                  key->objectid);
1702                                         b = read_node_slot(root, b, slot);
1703                                 }
1704                         }
1705                         if (!p->skip_locking) {
1706                                 int lret;
1707
1708                                 btrfs_clear_path_blocking(p, NULL);
1709                                 lret = btrfs_try_spin_lock(b);
1710
1711                                 if (!lret) {
1712                                         btrfs_set_path_blocking(p);
1713                                         btrfs_tree_lock(b);
1714                                         btrfs_clear_path_blocking(p, b);
1715                                 }
1716                         }
1717                 } else {
1718                         p->slots[level] = slot;
1719                         if (ins_len > 0 &&
1720                             btrfs_leaf_free_space(root, b) < ins_len) {
1721                                 int sret;
1722
1723                                 btrfs_set_path_blocking(p);
1724                                 sret = split_leaf(trans, root, key,
1725                                                       p, ins_len, ret == 0);
1726                                 btrfs_clear_path_blocking(p, NULL);
1727
1728                                 BUG_ON(sret > 0);
1729                                 if (sret) {
1730                                         ret = sret;
1731                                         goto done;
1732                                 }
1733                         }
1734                         if (!p->search_for_split)
1735                                 unlock_up(p, level, lowest_unlock);
1736                         goto done;
1737                 }
1738         }
1739         ret = 1;
1740 done:
1741         /*
1742          * we don't really know what they plan on doing with the path
1743          * from here on, so for now just mark it as blocking
1744          */
1745         btrfs_set_path_blocking(p);
1746         if (prealloc_block.objectid) {
1747                 btrfs_free_reserved_extent(root,
1748                            prealloc_block.objectid,
1749                            prealloc_block.offset);
1750         }
1751         return ret;
1752 }
1753
1754 int btrfs_merge_path(struct btrfs_trans_handle *trans,
1755                      struct btrfs_root *root,
1756                      struct btrfs_key *node_keys,
1757                      u64 *nodes, int lowest_level)
1758 {
1759         struct extent_buffer *eb;
1760         struct extent_buffer *parent;
1761         struct btrfs_key key;
1762         u64 bytenr;
1763         u64 generation;
1764         u32 blocksize;
1765         int level;
1766         int slot;
1767         int key_match;
1768         int ret;
1769
1770         eb = btrfs_lock_root_node(root);
1771         ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
1772         BUG_ON(ret);
1773
1774         btrfs_set_lock_blocking(eb);
1775
1776         parent = eb;
1777         while (1) {
1778                 level = btrfs_header_level(parent);
1779                 if (level == 0 || level <= lowest_level)
1780                         break;
1781
1782                 ret = bin_search(parent, &node_keys[lowest_level], level,
1783                                  &slot);
1784                 if (ret && slot > 0)
1785                         slot--;
1786
1787                 bytenr = btrfs_node_blockptr(parent, slot);
1788                 if (nodes[level - 1] == bytenr)
1789                         break;
1790
1791                 blocksize = btrfs_level_size(root, level - 1);
1792                 generation = btrfs_node_ptr_generation(parent, slot);
1793                 btrfs_node_key_to_cpu(eb, &key, slot);
1794                 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1795
1796                 if (generation == trans->transid) {
1797                         eb = read_tree_block(root, bytenr, blocksize,
1798                                              generation);
1799                         btrfs_tree_lock(eb);
1800                         btrfs_set_lock_blocking(eb);
1801                 }
1802
1803                 /*
1804                  * if node keys match and node pointer hasn't been modified
1805                  * in the running transaction, we can merge the path. for
1806                  * blocks owened by reloc trees, the node pointer check is
1807                  * skipped, this is because these blocks are fully controlled
1808                  * by the space balance code, no one else can modify them.
1809                  */
1810                 if (!nodes[level - 1] || !key_match ||
1811                     (generation == trans->transid &&
1812                      btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1813                         if (level == 1 || level == lowest_level + 1) {
1814                                 if (generation == trans->transid) {
1815                                         btrfs_tree_unlock(eb);
1816                                         free_extent_buffer(eb);
1817                                 }
1818                                 break;
1819                         }
1820
1821                         if (generation != trans->transid) {
1822                                 eb = read_tree_block(root, bytenr, blocksize,
1823                                                 generation);
1824                                 btrfs_tree_lock(eb);
1825                                 btrfs_set_lock_blocking(eb);
1826                         }
1827
1828                         ret = btrfs_cow_block(trans, root, eb, parent, slot,
1829                                               &eb, 0);
1830                         BUG_ON(ret);
1831
1832                         if (root->root_key.objectid ==
1833                             BTRFS_TREE_RELOC_OBJECTID) {
1834                                 if (!nodes[level - 1]) {
1835                                         nodes[level - 1] = eb->start;
1836                                         memcpy(&node_keys[level - 1], &key,
1837                                                sizeof(node_keys[0]));
1838                                 } else {
1839                                         WARN_ON(1);
1840                                 }
1841                         }
1842
1843                         btrfs_tree_unlock(parent);
1844                         free_extent_buffer(parent);
1845                         parent = eb;
1846                         continue;
1847                 }
1848
1849                 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1850                 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1851                 btrfs_mark_buffer_dirty(parent);
1852
1853                 ret = btrfs_inc_extent_ref(trans, root,
1854                                         nodes[level - 1],
1855                                         blocksize, parent->start,
1856                                         btrfs_header_owner(parent),
1857                                         btrfs_header_generation(parent),
1858                                         level - 1);
1859                 BUG_ON(ret);
1860
1861                 /*
1862                  * If the block was created in the running transaction,
1863                  * it's possible this is the last reference to it, so we
1864                  * should drop the subtree.
1865                  */
1866                 if (generation == trans->transid) {
1867                         ret = btrfs_drop_subtree(trans, root, eb, parent);
1868                         BUG_ON(ret);
1869                         btrfs_tree_unlock(eb);
1870                         free_extent_buffer(eb);
1871                 } else {
1872                         ret = btrfs_free_extent(trans, root, bytenr,
1873                                         blocksize, parent->start,
1874                                         btrfs_header_owner(parent),
1875                                         btrfs_header_generation(parent),
1876                                         level - 1, 1);
1877                         BUG_ON(ret);
1878                 }
1879                 break;
1880         }
1881         btrfs_tree_unlock(parent);
1882         free_extent_buffer(parent);
1883         return 0;
1884 }
1885
1886 /*
1887  * adjust the pointers going up the tree, starting at level
1888  * making sure the right key of each node is points to 'key'.
1889  * This is used after shifting pointers to the left, so it stops
1890  * fixing up pointers when a given leaf/node is not in slot 0 of the
1891  * higher levels
1892  *
1893  * If this fails to write a tree block, it returns -1, but continues
1894  * fixing up the blocks in ram so the tree is consistent.
1895  */
1896 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1897                           struct btrfs_root *root, struct btrfs_path *path,
1898                           struct btrfs_disk_key *key, int level)
1899 {
1900         int i;
1901         int ret = 0;
1902         struct extent_buffer *t;
1903
1904         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1905                 int tslot = path->slots[i];
1906                 if (!path->nodes[i])
1907                         break;
1908                 t = path->nodes[i];
1909                 btrfs_set_node_key(t, key, tslot);
1910                 btrfs_mark_buffer_dirty(path->nodes[i]);
1911                 if (tslot != 0)
1912                         break;
1913         }
1914         return ret;
1915 }
1916
1917 /*
1918  * update item key.
1919  *
1920  * This function isn't completely safe. It's the caller's responsibility
1921  * that the new key won't break the order
1922  */
1923 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1924                             struct btrfs_root *root, struct btrfs_path *path,
1925                             struct btrfs_key *new_key)
1926 {
1927         struct btrfs_disk_key disk_key;
1928         struct extent_buffer *eb;
1929         int slot;
1930
1931         eb = path->nodes[0];
1932         slot = path->slots[0];
1933         if (slot > 0) {
1934                 btrfs_item_key(eb, &disk_key, slot - 1);
1935                 if (comp_keys(&disk_key, new_key) >= 0)
1936                         return -1;
1937         }
1938         if (slot < btrfs_header_nritems(eb) - 1) {
1939                 btrfs_item_key(eb, &disk_key, slot + 1);
1940                 if (comp_keys(&disk_key, new_key) <= 0)
1941                         return -1;
1942         }
1943
1944         btrfs_cpu_key_to_disk(&disk_key, new_key);
1945         btrfs_set_item_key(eb, &disk_key, slot);
1946         btrfs_mark_buffer_dirty(eb);
1947         if (slot == 0)
1948                 fixup_low_keys(trans, root, path, &disk_key, 1);
1949         return 0;
1950 }
1951
1952 /*
1953  * try to push data from one node into the next node left in the
1954  * tree.
1955  *
1956  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1957  * error, and > 0 if there was no room in the left hand block.
1958  */
1959 static int push_node_left(struct btrfs_trans_handle *trans,
1960                           struct btrfs_root *root, struct extent_buffer *dst,
1961                           struct extent_buffer *src, int empty)
1962 {
1963         int push_items = 0;
1964         int src_nritems;
1965         int dst_nritems;
1966         int ret = 0;
1967
1968         src_nritems = btrfs_header_nritems(src);
1969         dst_nritems = btrfs_header_nritems(dst);
1970         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1971         WARN_ON(btrfs_header_generation(src) != trans->transid);
1972         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1973
1974         if (!empty && src_nritems <= 8)
1975                 return 1;
1976
1977         if (push_items <= 0)
1978                 return 1;
1979
1980         if (empty) {
1981                 push_items = min(src_nritems, push_items);
1982                 if (push_items < src_nritems) {
1983                         /* leave at least 8 pointers in the node if
1984                          * we aren't going to empty it
1985                          */
1986                         if (src_nritems - push_items < 8) {
1987                                 if (push_items <= 8)
1988                                         return 1;
1989                                 push_items -= 8;
1990                         }
1991                 }
1992         } else
1993                 push_items = min(src_nritems - 8, push_items);
1994
1995         copy_extent_buffer(dst, src,
1996                            btrfs_node_key_ptr_offset(dst_nritems),
1997                            btrfs_node_key_ptr_offset(0),
1998                            push_items * sizeof(struct btrfs_key_ptr));
1999
2000         if (push_items < src_nritems) {
2001                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
2002                                       btrfs_node_key_ptr_offset(push_items),
2003                                       (src_nritems - push_items) *
2004                                       sizeof(struct btrfs_key_ptr));
2005         }
2006         btrfs_set_header_nritems(src, src_nritems - push_items);
2007         btrfs_set_header_nritems(dst, dst_nritems + push_items);
2008         btrfs_mark_buffer_dirty(src);
2009         btrfs_mark_buffer_dirty(dst);
2010
2011         ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
2012         BUG_ON(ret);
2013
2014         return ret;
2015 }
2016
2017 /*
2018  * try to push data from one node into the next node right in the
2019  * tree.
2020  *
2021  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
2022  * error, and > 0 if there was no room in the right hand block.
2023  *
2024  * this will  only push up to 1/2 the contents of the left node over
2025  */
2026 static int balance_node_right(struct btrfs_trans_handle *trans,
2027                               struct btrfs_root *root,
2028                               struct extent_buffer *dst,
2029                               struct extent_buffer *src)
2030 {
2031         int push_items = 0;
2032         int max_push;
2033         int src_nritems;
2034         int dst_nritems;
2035         int ret = 0;
2036
2037         WARN_ON(btrfs_header_generation(src) != trans->transid);
2038         WARN_ON(btrfs_header_generation(dst) != trans->transid);
2039
2040         src_nritems = btrfs_header_nritems(src);
2041         dst_nritems = btrfs_header_nritems(dst);
2042         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
2043         if (push_items <= 0)
2044                 return 1;
2045
2046         if (src_nritems < 4)
2047                 return 1;
2048
2049         max_push = src_nritems / 2 + 1;
2050         /* don't try to empty the node */
2051         if (max_push >= src_nritems)
2052                 return 1;
2053
2054         if (max_push < push_items)
2055                 push_items = max_push;
2056
2057         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
2058                                       btrfs_node_key_ptr_offset(0),
2059                                       (dst_nritems) *
2060                                       sizeof(struct btrfs_key_ptr));
2061
2062         copy_extent_buffer(dst, src,
2063                            btrfs_node_key_ptr_offset(0),
2064                            btrfs_node_key_ptr_offset(src_nritems - push_items),
2065                            push_items * sizeof(struct btrfs_key_ptr));
2066
2067         btrfs_set_header_nritems(src, src_nritems - push_items);
2068         btrfs_set_header_nritems(dst, dst_nritems + push_items);
2069
2070         btrfs_mark_buffer_dirty(src);
2071         btrfs_mark_buffer_dirty(dst);
2072
2073         ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
2074         BUG_ON(ret);
2075
2076         return ret;
2077 }
2078
2079 /*
2080  * helper function to insert a new root level in the tree.
2081  * A new node is allocated, and a single item is inserted to
2082  * point to the existing root
2083  *
2084  * returns zero on success or < 0 on failure.
2085  */
2086 static noinline int insert_new_root(struct btrfs_trans_handle *trans,
2087                            struct btrfs_root *root,
2088                            struct btrfs_path *path, int level)
2089 {
2090         u64 lower_gen;
2091         struct extent_buffer *lower;
2092         struct extent_buffer *c;
2093         struct extent_buffer *old;
2094         struct btrfs_disk_key lower_key;
2095         int ret;
2096
2097         BUG_ON(path->nodes[level]);
2098         BUG_ON(path->nodes[level-1] != root->node);
2099
2100         lower = path->nodes[level-1];
2101         if (level == 1)
2102                 btrfs_item_key(lower, &lower_key, 0);
2103         else
2104                 btrfs_node_key(lower, &lower_key, 0);
2105
2106         c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
2107                                    root->root_key.objectid, trans->transid,
2108                                    level, root->node->start, 0);
2109         if (IS_ERR(c))
2110                 return PTR_ERR(c);
2111
2112         memset_extent_buffer(c, 0, 0, root->nodesize);
2113         btrfs_set_header_nritems(c, 1);
2114         btrfs_set_header_level(c, level);
2115         btrfs_set_header_bytenr(c, c->start);
2116         btrfs_set_header_generation(c, trans->transid);
2117         btrfs_set_header_owner(c, root->root_key.objectid);
2118
2119         write_extent_buffer(c, root->fs_info->fsid,
2120                             (unsigned long)btrfs_header_fsid(c),
2121                             BTRFS_FSID_SIZE);
2122
2123         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
2124                             (unsigned long)btrfs_header_chunk_tree_uuid(c),
2125                             BTRFS_UUID_SIZE);
2126
2127         btrfs_set_node_key(c, &lower_key, 0);
2128         btrfs_set_node_blockptr(c, 0, lower->start);
2129         lower_gen = btrfs_header_generation(lower);
2130         WARN_ON(lower_gen != trans->transid);
2131
2132         btrfs_set_node_ptr_generation(c, 0, lower_gen);
2133
2134         btrfs_mark_buffer_dirty(c);
2135
2136         spin_lock(&root->node_lock);
2137         old = root->node;
2138         root->node = c;
2139         spin_unlock(&root->node_lock);
2140
2141         ret = btrfs_update_extent_ref(trans, root, lower->start,
2142                                       lower->start, c->start,
2143                                       root->root_key.objectid,
2144                                       trans->transid, level - 1);
2145         BUG_ON(ret);
2146
2147         /* the super has an extra ref to root->node */
2148         free_extent_buffer(old);
2149
2150         add_root_to_dirty_list(root);
2151         extent_buffer_get(c);
2152         path->nodes[level] = c;
2153         path->locks[level] = 1;
2154         path->slots[level] = 0;
2155         return 0;
2156 }
2157
2158 /*
2159  * worker function to insert a single pointer in a node.
2160  * the node should have enough room for the pointer already
2161  *
2162  * slot and level indicate where you want the key to go, and
2163  * blocknr is the block the key points to.
2164  *
2165  * returns zero on success and < 0 on any error
2166  */
2167 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2168                       *root, struct btrfs_path *path, struct btrfs_disk_key
2169                       *key, u64 bytenr, int slot, int level)
2170 {
2171         struct extent_buffer *lower;
2172         int nritems;
2173
2174         BUG_ON(!path->nodes[level]);
2175         lower = path->nodes[level];
2176         nritems = btrfs_header_nritems(lower);
2177         if (slot > nritems)
2178                 BUG();
2179         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
2180                 BUG();
2181         if (slot != nritems) {
2182                 memmove_extent_buffer(lower,
2183                               btrfs_node_key_ptr_offset(slot + 1),
2184                               btrfs_node_key_ptr_offset(slot),
2185                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
2186         }
2187         btrfs_set_node_key(lower, key, slot);
2188         btrfs_set_node_blockptr(lower, slot, bytenr);
2189         WARN_ON(trans->transid == 0);
2190         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2191         btrfs_set_header_nritems(lower, nritems + 1);
2192         btrfs_mark_buffer_dirty(lower);
2193         return 0;
2194 }
2195
2196 /*
2197  * split the node at the specified level in path in two.
2198  * The path is corrected to point to the appropriate node after the split
2199  *
2200  * Before splitting this tries to make some room in the node by pushing
2201  * left and right, if either one works, it returns right away.
2202  *
2203  * returns 0 on success and < 0 on failure
2204  */
2205 static noinline int split_node(struct btrfs_trans_handle *trans,
2206                                struct btrfs_root *root,
2207                                struct btrfs_path *path, int level)
2208 {
2209         struct extent_buffer *c;
2210         struct extent_buffer *split;
2211         struct btrfs_disk_key disk_key;
2212         int mid;
2213         int ret;
2214         int wret;
2215         u32 c_nritems;
2216
2217         c = path->nodes[level];
2218         WARN_ON(btrfs_header_generation(c) != trans->transid);
2219         if (c == root->node) {
2220                 /* trying to split the root, lets make a new one */
2221                 ret = insert_new_root(trans, root, path, level + 1);
2222                 if (ret)
2223                         return ret;
2224         } else {
2225                 ret = push_nodes_for_insert(trans, root, path, level);
2226                 c = path->nodes[level];
2227                 if (!ret && btrfs_header_nritems(c) <
2228                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
2229                         return 0;
2230                 if (ret < 0)
2231                         return ret;
2232         }
2233
2234         c_nritems = btrfs_header_nritems(c);
2235
2236         split = btrfs_alloc_free_block(trans, root, root->nodesize,
2237                                         path->nodes[level + 1]->start,
2238                                         root->root_key.objectid,
2239                                         trans->transid, level, c->start, 0);
2240         if (IS_ERR(split))
2241                 return PTR_ERR(split);
2242
2243         btrfs_set_header_flags(split, btrfs_header_flags(c));
2244         btrfs_set_header_level(split, btrfs_header_level(c));
2245         btrfs_set_header_bytenr(split, split->start);
2246         btrfs_set_header_generation(split, trans->transid);
2247         btrfs_set_header_owner(split, root->root_key.objectid);
2248         btrfs_set_header_flags(split, 0);
2249         write_extent_buffer(split, root->fs_info->fsid,
2250                             (unsigned long)btrfs_header_fsid(split),
2251                             BTRFS_FSID_SIZE);
2252         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2253                             (unsigned long)btrfs_header_chunk_tree_uuid(split),
2254                             BTRFS_UUID_SIZE);
2255
2256         mid = (c_nritems + 1) / 2;
2257
2258         copy_extent_buffer(split, c,
2259                            btrfs_node_key_ptr_offset(0),
2260                            btrfs_node_key_ptr_offset(mid),
2261                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2262         btrfs_set_header_nritems(split, c_nritems - mid);
2263         btrfs_set_header_nritems(c, mid);
2264         ret = 0;
2265
2266         btrfs_mark_buffer_dirty(c);
2267         btrfs_mark_buffer_dirty(split);
2268
2269         btrfs_node_key(split, &disk_key, 0);
2270         wret = insert_ptr(trans, root, path, &disk_key, split->start,
2271                           path->slots[level + 1] + 1,
2272                           level + 1);
2273         if (wret)
2274                 ret = wret;
2275
2276         ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2277         BUG_ON(ret);
2278
2279         if (path->slots[level] >= mid) {
2280                 path->slots[level] -= mid;
2281                 btrfs_tree_unlock(c);
2282                 free_extent_buffer(c);
2283                 path->nodes[level] = split;
2284                 path->slots[level + 1] += 1;
2285         } else {
2286                 btrfs_tree_unlock(split);
2287                 free_extent_buffer(split);
2288         }
2289         return ret;
2290 }
2291
2292 /*
2293  * how many bytes are required to store the items in a leaf.  start
2294  * and nr indicate which items in the leaf to check.  This totals up the
2295  * space used both by the item structs and the item data
2296  */
2297 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2298 {
2299         int data_len;
2300         int nritems = btrfs_header_nritems(l);
2301         int end = min(nritems, start + nr) - 1;
2302
2303         if (!nr)
2304                 return 0;
2305         data_len = btrfs_item_end_nr(l, start);
2306         data_len = data_len - btrfs_item_offset_nr(l, end);
2307         data_len += sizeof(struct btrfs_item) * nr;
2308         WARN_ON(data_len < 0);
2309         return data_len;
2310 }
2311
2312 /*
2313  * The space between the end of the leaf items and
2314  * the start of the leaf data.  IOW, how much room
2315  * the leaf has left for both items and data
2316  */
2317 noinline int btrfs_leaf_free_space(struct btrfs_root *root,
2318                                    struct extent_buffer *leaf)
2319 {
2320         int nritems = btrfs_header_nritems(leaf);
2321         int ret;
2322         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2323         if (ret < 0) {
2324                 printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
2325                        "used %d nritems %d\n",
2326                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
2327                        leaf_space_used(leaf, 0, nritems), nritems);
2328         }
2329         return ret;
2330 }
2331
2332 /*
2333  * push some data in the path leaf to the right, trying to free up at
2334  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2335  *
2336  * returns 1 if the push failed because the other node didn't have enough
2337  * room, 0 if everything worked out and < 0 if there were major errors.
2338  */
2339 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2340                            *root, struct btrfs_path *path, int data_size,
2341                            int empty)
2342 {
2343         struct extent_buffer *left = path->nodes[0];
2344         struct extent_buffer *right;
2345         struct extent_buffer *upper;
2346         struct btrfs_disk_key disk_key;
2347         int slot;
2348         u32 i;
2349         int free_space;
2350         int push_space = 0;
2351         int push_items = 0;
2352         struct btrfs_item *item;
2353         u32 left_nritems;
2354         u32 nr;
2355         u32 right_nritems;
2356         u32 data_end;
2357         u32 this_item_size;
2358         int ret;
2359
2360         slot = path->slots[1];
2361         if (!path->nodes[1])
2362                 return 1;
2363
2364         upper = path->nodes[1];
2365         if (slot >= btrfs_header_nritems(upper) - 1)
2366                 return 1;
2367
2368         btrfs_assert_tree_locked(path->nodes[1]);
2369
2370         right = read_node_slot(root, upper, slot + 1);
2371         btrfs_tree_lock(right);
2372         btrfs_set_lock_blocking(right);
2373
2374         free_space = btrfs_leaf_free_space(root, right);
2375         if (free_space < data_size)
2376                 goto out_unlock;
2377
2378         /* cow and double check */
2379         ret = btrfs_cow_block(trans, root, right, upper,
2380                               slot + 1, &right, 0);
2381         if (ret)
2382                 goto out_unlock;
2383
2384         free_space = btrfs_leaf_free_space(root, right);
2385         if (free_space < data_size)
2386                 goto out_unlock;
2387
2388         left_nritems = btrfs_header_nritems(left);
2389         if (left_nritems == 0)
2390                 goto out_unlock;
2391
2392         if (empty)
2393                 nr = 0;
2394         else
2395                 nr = 1;
2396
2397         if (path->slots[0] >= left_nritems)
2398                 push_space += data_size;
2399
2400         i = left_nritems - 1;
2401         while (i >= nr) {
2402                 item = btrfs_item_nr(left, i);
2403
2404                 if (!empty && push_items > 0) {
2405                         if (path->slots[0] > i)
2406                                 break;
2407                         if (path->slots[0] == i) {
2408                                 int space = btrfs_leaf_free_space(root, left);
2409                                 if (space + push_space * 2 > free_space)
2410                                         break;
2411                         }
2412                 }
2413
2414                 if (path->slots[0] == i)
2415                         push_space += data_size;
2416
2417                 if (!left->map_token) {
2418                         map_extent_buffer(left, (unsigned long)item,
2419                                         sizeof(struct btrfs_item),
2420                                         &left->map_token, &left->kaddr,
2421                                         &left->map_start, &left->map_len,
2422                                         KM_USER1);
2423                 }
2424
2425                 this_item_size = btrfs_item_size(left, item);
2426                 if (this_item_size + sizeof(*item) + push_space > free_space)
2427                         break;
2428
2429                 push_items++;
2430                 push_space += this_item_size + sizeof(*item);
2431                 if (i == 0)
2432                         break;
2433                 i--;
2434         }
2435         if (left->map_token) {
2436                 unmap_extent_buffer(left, left->map_token, KM_USER1);
2437                 left->map_token = NULL;
2438         }
2439
2440         if (push_items == 0)
2441                 goto out_unlock;
2442
2443         if (!empty && push_items == left_nritems)
2444                 WARN_ON(1);
2445
2446         /* push left to right */
2447         right_nritems = btrfs_header_nritems(right);
2448
2449         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2450         push_space -= leaf_data_end(root, left);
2451
2452         /* make room in the right data area */
2453         data_end = leaf_data_end(root, right);
2454         memmove_extent_buffer(right,
2455                               btrfs_leaf_data(right) + data_end - push_space,
2456                               btrfs_leaf_data(right) + data_end,
2457                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
2458
2459         /* copy from the left data area */
2460         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
2461                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
2462                      btrfs_leaf_data(left) + leaf_data_end(root, left),
2463                      push_space);
2464
2465         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2466                               btrfs_item_nr_offset(0),
2467                               right_nritems * sizeof(struct btrfs_item));
2468
2469         /* copy the items from left to right */
2470         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2471                    btrfs_item_nr_offset(left_nritems - push_items),
2472                    push_items * sizeof(struct btrfs_item));
2473
2474         /* update the item pointers */
2475         right_nritems += push_items;
2476         btrfs_set_header_nritems(right, right_nritems);
2477         push_space = BTRFS_LEAF_DATA_SIZE(root);
2478         for (i = 0; i < right_nritems; i++) {
2479                 item = btrfs_item_nr(right, i);
2480                 if (!right->map_token) {
2481                         map_extent_buffer(right, (unsigned long)item,
2482                                         sizeof(struct btrfs_item),
2483                                         &right->map_token, &right->kaddr,
2484                                         &right->map_start, &right->map_len,
2485                                         KM_USER1);
2486                 }
2487                 push_space -= btrfs_item_size(right, item);
2488                 btrfs_set_item_offset(right, item, push_space);
2489         }
2490
2491         if (right->map_token) {
2492                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2493                 right->map_token = NULL;
2494         }
2495         left_nritems -= push_items;
2496         btrfs_set_header_nritems(left, left_nritems);
2497
2498         if (left_nritems)
2499                 btrfs_mark_buffer_dirty(left);
2500         btrfs_mark_buffer_dirty(right);
2501
2502         ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2503         BUG_ON(ret);
2504
2505         btrfs_item_key(right, &disk_key, 0);
2506         btrfs_set_node_key(upper, &disk_key, slot + 1);
2507         btrfs_mark_buffer_dirty(upper);
2508
2509         /* then fixup the leaf pointer in the path */
2510         if (path->slots[0] >= left_nritems) {
2511                 path->slots[0] -= left_nritems;
2512                 if (btrfs_header_nritems(path->nodes[0]) == 0)
2513                         clean_tree_block(trans, root, path->nodes[0]);
2514                 btrfs_tree_unlock(path->nodes[0]);
2515                 free_extent_buffer(path->nodes[0]);
2516                 path->nodes[0] = right;
2517                 path->slots[1] += 1;
2518         } else {
2519                 btrfs_tree_unlock(right);
2520                 free_extent_buffer(right);
2521         }
2522         return 0;
2523
2524 out_unlock:
2525         btrfs_tree_unlock(right);
2526         free_extent_buffer(right);
2527         return 1;
2528 }
2529
2530 /*
2531  * push some data in the path leaf to the left, trying to free up at
2532  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2533  */
2534 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2535                           *root, struct btrfs_path *path, int data_size,
2536                           int empty)
2537 {
2538         struct btrfs_disk_key disk_key;
2539         struct extent_buffer *right = path->nodes[0];
2540         struct extent_buffer *left;
2541         int slot;
2542         int i;
2543         int free_space;
2544         int push_space = 0;
2545         int push_items = 0;
2546         struct btrfs_item *item;
2547         u32 old_left_nritems;
2548         u32 right_nritems;
2549         u32 nr;
2550         int ret = 0;
2551         int wret;
2552         u32 this_item_size;
2553         u32 old_left_item_size;
2554
2555         slot = path->slots[1];
2556         if (slot == 0)
2557                 return 1;
2558         if (!path->nodes[1])
2559                 return 1;
2560
2561         right_nritems = btrfs_header_nritems(right);
2562         if (right_nritems == 0)
2563                 return 1;
2564
2565         btrfs_assert_tree_locked(path->nodes[1]);
2566
2567         left = read_node_slot(root, path->nodes[1], slot - 1);
2568         btrfs_tree_lock(left);
2569         btrfs_set_lock_blocking(left);
2570
2571         free_space = btrfs_leaf_free_space(root, left);
2572         if (free_space < data_size) {
2573                 ret = 1;
2574                 goto out;
2575         }
2576
2577         /* cow and double check */
2578         ret = btrfs_cow_block(trans, root, left,
2579                               path->nodes[1], slot - 1, &left, 0);
2580         if (ret) {
2581                 /* we hit -ENOSPC, but it isn't fatal here */
2582                 ret = 1;
2583                 goto out;
2584         }
2585
2586         free_space = btrfs_leaf_free_space(root, left);
2587         if (free_space < data_size) {
2588                 ret = 1;
2589                 goto out;
2590         }
2591
2592         if (empty)
2593                 nr = right_nritems;
2594         else
2595                 nr = right_nritems - 1;
2596
2597         for (i = 0; i < nr; i++) {
2598                 item = btrfs_item_nr(right, i);
2599                 if (!right->map_token) {
2600                         map_extent_buffer(right, (unsigned long)item,
2601                                         sizeof(struct btrfs_item),
2602                                         &right->map_token, &right->kaddr,
2603                                         &right->map_start, &right->map_len,
2604                                         KM_USER1);
2605                 }
2606
2607                 if (!empty && push_items > 0) {
2608                         if (path->slots[0] < i)
2609                                 break;
2610                         if (path->slots[0] == i) {
2611                                 int space = btrfs_leaf_free_space(root, right);
2612                                 if (space + push_space * 2 > free_space)
2613                                         break;
2614                         }
2615                 }
2616
2617                 if (path->slots[0] == i)
2618                         push_space += data_size;
2619
2620                 this_item_size = btrfs_item_size(right, item);
2621                 if (this_item_size + sizeof(*item) + push_space > free_space)
2622                         break;
2623
2624                 push_items++;
2625                 push_space += this_item_size + sizeof(*item);
2626         }
2627
2628         if (right->map_token) {
2629                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2630                 right->map_token = NULL;
2631         }
2632
2633         if (push_items == 0) {
2634                 ret = 1;
2635                 goto out;
2636         }
2637         if (!empty && push_items == btrfs_header_nritems(right))
2638                 WARN_ON(1);
2639
2640         /* push data from right to left */
2641         copy_extent_buffer(left, right,
2642                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
2643                            btrfs_item_nr_offset(0),
2644                            push_items * sizeof(struct btrfs_item));
2645
2646         push_space = BTRFS_LEAF_DATA_SIZE(root) -
2647                      btrfs_item_offset_nr(right, push_items - 1);
2648
2649         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
2650                      leaf_data_end(root, left) - push_space,
2651                      btrfs_leaf_data(right) +
2652                      btrfs_item_offset_nr(right, push_items - 1),
2653                      push_space);
2654         old_left_nritems = btrfs_header_nritems(left);
2655         BUG_ON(old_left_nritems <= 0);
2656
2657         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2658         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2659                 u32 ioff;
2660
2661                 item = btrfs_item_nr(left, i);
2662                 if (!left->map_token) {
2663                         map_extent_buffer(left, (unsigned long)item,
2664                                         sizeof(struct btrfs_item),
2665                                         &left->map_token, &left->kaddr,
2666                                         &left->map_start, &left->map_len,
2667                                         KM_USER1);
2668                 }
2669
2670                 ioff = btrfs_item_offset(left, item);
2671                 btrfs_set_item_offset(left, item,
2672                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2673         }
2674         btrfs_set_header_nritems(left, old_left_nritems + push_items);
2675         if (left->map_token) {
2676                 unmap_extent_buffer(left, left->map_token, KM_USER1);
2677                 left->map_token = NULL;
2678         }
2679
2680         /* fixup right node */
2681         if (push_items > right_nritems) {
2682                 printk(KERN_CRIT "push items %d nr %u\n", push_items,
2683                        right_nritems);
2684                 WARN_ON(1);
2685         }
2686
2687         if (push_items < right_nritems) {
2688                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
2689                                                   leaf_data_end(root, right);
2690                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
2691                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
2692                                       btrfs_leaf_data(right) +
2693                                       leaf_data_end(root, right), push_space);
2694
2695                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2696                               btrfs_item_nr_offset(push_items),
2697                              (btrfs_header_nritems(right) - push_items) *
2698                              sizeof(struct btrfs_item));
2699         }
2700         right_nritems -= push_items;
2701         btrfs_set_header_nritems(right, right_nritems);
2702         push_space = BTRFS_LEAF_DATA_SIZE(root);
2703         for (i = 0; i < right_nritems; i++) {
2704                 item = btrfs_item_nr(right, i);
2705
2706                 if (!right->map_token) {
2707                         map_extent_buffer(right, (unsigned long)item,
2708                                         sizeof(struct btrfs_item),
2709                                         &right->map_token, &right->kaddr,
2710                                         &right->map_start, &right->map_len,
2711                                         KM_USER1);
2712                 }
2713
2714                 push_space = push_space - btrfs_item_size(right, item);
2715                 btrfs_set_item_offset(right, item, push_space);
2716         }
2717         if (right->map_token) {
2718                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2719                 right->map_token = NULL;
2720         }
2721
2722         btrfs_mark_buffer_dirty(left);
2723         if (right_nritems)
2724                 btrfs_mark_buffer_dirty(right);
2725
2726         ret = btrfs_update_ref(trans, root, right, left,
2727                                old_left_nritems, push_items);
2728         BUG_ON(ret);
2729
2730         btrfs_item_key(right, &disk_key, 0);
2731         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2732         if (wret)
2733                 ret = wret;
2734
2735         /* then fixup the leaf pointer in the path */
2736         if (path->slots[0] < push_items) {
2737                 path->slots[0] += old_left_nritems;
2738                 if (btrfs_header_nritems(path->nodes[0]) == 0)
2739                         clean_tree_block(trans, root, path->nodes[0]);
2740                 btrfs_tree_unlock(path->nodes[0]);
2741                 free_extent_buffer(path->nodes[0]);
2742                 path->nodes[0] = left;
2743                 path->slots[1] -= 1;
2744         } else {
2745                 btrfs_tree_unlock(left);
2746                 free_extent_buffer(left);
2747                 path->slots[0] -= push_items;
2748         }
2749         BUG_ON(path->slots[0] < 0);
2750         return ret;
2751 out:
2752         btrfs_tree_unlock(left);
2753         free_extent_buffer(left);
2754         return ret;
2755 }
2756
2757 /*
2758  * split the path's leaf in two, making sure there is at least data_size
2759  * available for the resulting leaf level of the path.
2760  *
2761  * returns 0 if all went well and < 0 on failure.
2762  */
2763 static noinline int split_leaf(struct btrfs_trans_handle *trans,
2764                                struct btrfs_root *root,
2765                                struct btrfs_key *ins_key,
2766                                struct btrfs_path *path, int data_size,
2767                                int extend)
2768 {
2769         struct extent_buffer *l;
2770         u32 nritems;
2771         int mid;
2772         int slot;
2773         struct extent_buffer *right;
2774         int data_copy_size;
2775         int rt_data_off;
2776         int i;
2777         int ret = 0;
2778         int wret;
2779         int double_split;
2780         int num_doubles = 0;
2781         struct btrfs_disk_key disk_key;
2782
2783         /* first try to make some room by pushing left and right */
2784         if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
2785                 wret = push_leaf_right(trans, root, path, data_size, 0);
2786                 if (wret < 0)
2787                         return wret;
2788                 if (wret) {
2789                         wret = push_leaf_left(trans, root, path, data_size, 0);
2790                         if (wret < 0)
2791                                 return wret;
2792                 }
2793                 l = path->nodes[0];
2794
2795                 /* did the pushes work? */
2796                 if (btrfs_leaf_free_space(root, l) >= data_size)
2797                         return 0;
2798         }
2799
2800         if (!path->nodes[1]) {
2801                 ret = insert_new_root(trans, root, path, 1);
2802                 if (ret)
2803                         return ret;
2804         }
2805 again:
2806         double_split = 0;
2807         l = path->nodes[0];
2808         slot = path->slots[0];
2809         nritems = btrfs_header_nritems(l);
2810         mid = (nritems + 1) / 2;
2811
2812         right = btrfs_alloc_free_block(trans, root, root->leafsize,
2813                                         path->nodes[1]->start,
2814                                         root->root_key.objectid,
2815                                         trans->transid, 0, l->start, 0);
2816         if (IS_ERR(right)) {
2817                 BUG_ON(1);
2818                 return PTR_ERR(right);
2819         }
2820
2821         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2822         btrfs_set_header_bytenr(right, right->start);
2823         btrfs_set_header_generation(right, trans->transid);
2824         btrfs_set_header_owner(right, root->root_key.objectid);
2825         btrfs_set_header_level(right, 0);
2826         write_extent_buffer(right, root->fs_info->fsid,
2827                             (unsigned long)btrfs_header_fsid(right),
2828                             BTRFS_FSID_SIZE);
2829
2830         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2831                             (unsigned long)btrfs_header_chunk_tree_uuid(right),
2832                             BTRFS_UUID_SIZE);
2833         if (mid <= slot) {
2834                 if (nritems == 1 ||
2835                     leaf_space_used(l, mid, nritems - mid) + data_size >
2836                         BTRFS_LEAF_DATA_SIZE(root)) {
2837                         if (slot >= nritems) {
2838                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2839                                 btrfs_set_header_nritems(right, 0);
2840                                 wret = insert_ptr(trans, root, path,
2841                                                   &disk_key, right->start,
2842                                                   path->slots[1] + 1, 1);
2843                                 if (wret)
2844                                         ret = wret;
2845
2846                                 btrfs_tree_unlock(path->nodes[0]);
2847                                 free_extent_buffer(path->nodes[0]);
2848                                 path->nodes[0] = right;
2849                                 path->slots[0] = 0;
2850                                 path->slots[1] += 1;
2851                                 btrfs_mark_buffer_dirty(right);
2852                                 return ret;
2853                         }
2854                         mid = slot;
2855                         if (mid != nritems &&
2856                             leaf_space_used(l, mid, nritems - mid) +
2857                             data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2858                                 double_split = 1;
2859                         }
2860                 }
2861         } else {
2862                 if (leaf_space_used(l, 0, mid) + data_size >
2863                         BTRFS_LEAF_DATA_SIZE(root)) {
2864                         if (!extend && data_size && slot == 0) {
2865                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2866                                 btrfs_set_header_nritems(right, 0);
2867                                 wret = insert_ptr(trans, root, path,
2868                                                   &disk_key,
2869                                                   right->start,
2870                                                   path->slots[1], 1);
2871                                 if (wret)
2872                                         ret = wret;
2873                                 btrfs_tree_unlock(path->nodes[0]);
2874                                 free_extent_buffer(path->nodes[0]);
2875                                 path->nodes[0] = right;
2876                                 path->slots[0] = 0;
2877                                 if (path->slots[1] == 0) {
2878                                         wret = fixup_low_keys(trans, root,
2879                                                       path, &disk_key, 1);
2880                                         if (wret)
2881                                                 ret = wret;
2882                                 }
2883                                 btrfs_mark_buffer_dirty(right);
2884                                 return ret;
2885                         } else if ((extend || !data_size) && slot == 0) {
2886                                 mid = 1;
2887                         } else {
2888                                 mid = slot;
2889                                 if (mid != nritems &&
2890                                     leaf_space_used(l, mid, nritems - mid) +
2891                                     data_size > BTRFS_LEAF_DATA_SIZE(root)) {
2892                                         double_split = 1;
2893                                 }
2894                         }
2895                 }
2896         }
2897         nritems = nritems - mid;
2898         btrfs_set_header_nritems(right, nritems);
2899         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2900
2901         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2902                            btrfs_item_nr_offset(mid),
2903                            nritems * sizeof(struct btrfs_item));
2904
2905         copy_extent_buffer(right, l,
2906                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2907                      data_copy_size, btrfs_leaf_data(l) +
2908                      leaf_data_end(root, l), data_copy_size);
2909
2910         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2911                       btrfs_item_end_nr(l, mid);
2912
2913         for (i = 0; i < nritems; i++) {
2914                 struct btrfs_item *item = btrfs_item_nr(right, i);
2915                 u32 ioff;
2916
2917                 if (!right->map_token) {
2918                         map_extent_buffer(right, (unsigned long)item,
2919                                         sizeof(struct btrfs_item),
2920                                         &right->map_token, &right->kaddr,
2921                                         &right->map_start, &right->map_len,
2922                                         KM_USER1);
2923                 }
2924
2925                 ioff = btrfs_item_offset(right, item);
2926                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2927         }
2928
2929         if (right->map_token) {
2930                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2931                 right->map_token = NULL;
2932         }
2933
2934         btrfs_set_header_nritems(l, mid);
2935         ret = 0;
2936         btrfs_item_key(right, &disk_key, 0);
2937         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2938                           path->slots[1] + 1, 1);
2939         if (wret)
2940                 ret = wret;
2941
2942         btrfs_mark_buffer_dirty(right);
2943         btrfs_mark_buffer_dirty(l);
2944         BUG_ON(path->slots[0] != slot);
2945
2946         ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2947         BUG_ON(ret);
2948
2949         if (mid <= slot) {
2950                 btrfs_tree_unlock(path->nodes[0]);
2951                 free_extent_buffer(path->nodes[0]);
2952                 path->nodes[0] = right;
2953                 path->slots[0] -= mid;
2954                 path->slots[1] += 1;
2955         } else {
2956                 btrfs_tree_unlock(right);
2957                 free_extent_buffer(right);
2958         }
2959
2960         BUG_ON(path->slots[0] < 0);
2961
2962         if (double_split) {
2963                 BUG_ON(num_doubles != 0);
2964                 num_doubles++;
2965                 goto again;
2966         }
2967         return ret;
2968 }
2969
2970 /*
2971  * This function splits a single item into two items,
2972  * giving 'new_key' to the new item and splitting the
2973  * old one at split_offset (from the start of the item).
2974  *
2975  * The path may be released by this operation.  After
2976  * the split, the path is pointing to the old item.  The
2977  * new item is going to be in the same node as the old one.
2978  *
2979  * Note, the item being split must be smaller enough to live alone on
2980  * a tree block with room for one extra struct btrfs_item
2981  *
2982  * This allows us to split the item in place, keeping a lock on the
2983  * leaf the entire time.
2984  */
2985 int btrfs_split_item(struct btrfs_trans_handle *trans,
2986                      struct btrfs_root *root,
2987                      struct btrfs_path *path,
2988                      struct btrfs_key *new_key,
2989                      unsigned long split_offset)
2990 {
2991         u32 item_size;
2992         struct extent_buffer *leaf;
2993         struct btrfs_key orig_key;
2994         struct btrfs_item *item;
2995         struct btrfs_item *new_item;
2996         int ret = 0;
2997         int slot;
2998         u32 nritems;
2999         u32 orig_offset;
3000         struct btrfs_disk_key disk_key;
3001         char *buf;
3002
3003         leaf = path->nodes[0];
3004         btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
3005         if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
3006                 goto split;
3007
3008         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
3009         btrfs_release_path(root, path);
3010
3011         path->search_for_split = 1;
3012         path->keep_locks = 1;
3013
3014         ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
3015         path->search_for_split = 0;
3016
3017         /* if our item isn't there or got smaller, return now */
3018         if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
3019                                                         path->slots[0])) {
3020                 path->keep_locks = 0;
3021                 return -EAGAIN;
3022         }
3023
3024         ret = split_leaf(trans, root, &orig_key, path,
3025                          sizeof(struct btrfs_item), 1);
3026         path->keep_locks = 0;
3027         BUG_ON(ret);
3028
3029         /*
3030          * make sure any changes to the path from split_leaf leave it
3031          * in a blocking state
3032          */
3033         btrfs_set_path_blocking(path);
3034
3035         leaf = path->nodes[0];
3036         BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
3037
3038 split:
3039         item = btrfs_item_nr(leaf, path->slots[0]);
3040         orig_offset = btrfs_item_offset(leaf, item);
3041         item_size = btrfs_item_size(leaf, item);
3042
3043
3044         buf = kmalloc(item_size, GFP_NOFS);
3045         read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
3046                             path->slots[0]), item_size);
3047         slot = path->slots[0] + 1;
3048         leaf = path->nodes[0];
3049
3050         nritems = btrfs_header_nritems(leaf);
3051
3052         if (slot != nritems) {
3053                 /* shift the items */
3054                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
3055                               btrfs_item_nr_offset(slot),
3056                               (nritems - slot) * sizeof(struct btrfs_item));
3057
3058         }
3059
3060         btrfs_cpu_key_to_disk(&disk_key, new_key);
3061         btrfs_set_item_key(leaf, &disk_key, slot);
3062
3063         new_item = btrfs_item_nr(leaf, slot);
3064
3065         btrfs_set_item_offset(leaf, new_item, orig_offset);
3066         btrfs_set_item_size(leaf, new_item, item_size - split_offset);
3067
3068         btrfs_set_item_offset(leaf, item,
3069                               orig_offset + item_size - split_offset);
3070         btrfs_set_item_size(leaf, item, split_offset);
3071
3072         btrfs_set_header_nritems(leaf, nritems + 1);
3073
3074         /* write the data for the start of the original item */
3075         write_extent_buffer(leaf, buf,
3076                             btrfs_item_ptr_offset(leaf, path->slots[0]),
3077                             split_offset);
3078
3079         /* write the data for the new item */
3080         write_extent_buffer(leaf, buf + split_offset,
3081                             btrfs_item_ptr_offset(leaf, slot),
3082                             item_size - split_offset);
3083         btrfs_mark_buffer_dirty(leaf);
3084
3085         ret = 0;
3086         if (btrfs_leaf_free_space(root, leaf) < 0) {
3087                 btrfs_print_leaf(root, leaf);
3088                 BUG();
3089         }
3090         kfree(buf);
3091         return ret;
3092 }
3093
3094 /*
3095  * make the item pointed to by the path smaller.  new_size indicates
3096  * how small to make it, and from_end tells us if we just chop bytes
3097  * off the end of the item or if we shift the item to chop bytes off
3098  * the front.
3099  */
3100 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3101                         struct btrfs_root *root,
3102                         struct btrfs_path *path,
3103                         u32 new_size, int from_end)
3104 {
3105         int ret = 0;
3106         int slot;
3107         int slot_orig;
3108         struct extent_buffer *leaf;
3109         struct btrfs_item *item;
3110         u32 nritems;
3111         unsigned int data_end;
3112         unsigned int old_data_start;
3113         unsigned int old_size;
3114         unsigned int size_diff;
3115         int i;
3116
3117         slot_orig = path->slots[0];
3118         leaf = path->nodes[0];
3119         slot = path->slots[0];
3120
3121         old_size = btrfs_item_size_nr(leaf, slot);
3122         if (old_size == new_size)
3123                 return 0;
3124
3125         nritems = btrfs_header_nritems(leaf);
3126         data_end = leaf_data_end(root, leaf);
3127
3128         old_data_start = btrfs_item_offset_nr(leaf, slot);
3129
3130         size_diff = old_size - new_size;
3131
3132         BUG_ON(slot < 0);
3133         BUG_ON(slot >= nritems);
3134
3135         /*
3136          * item0..itemN ... dataN.offset..dataN.size .. data0.size
3137          */
3138         /* first correct the data pointers */
3139         for (i = slot; i < nritems; i++) {
3140                 u32 ioff;
3141                 item = btrfs_item_nr(leaf, i);
3142
3143                 if (!leaf->map_token) {
3144                         map_extent_buffer(leaf, (unsigned long)item,
3145                                         sizeof(struct btrfs_item),
3146                                         &leaf->map_token, &leaf->kaddr,
3147                                         &leaf->map_start, &leaf->map_len,
3148                                         KM_USER1);
3149                 }
3150
3151                 ioff = btrfs_item_offset(leaf, item);
3152                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
3153         }
3154
3155         if (leaf->map_token) {
3156                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3157                 leaf->map_token = NULL;
3158         }
3159
3160         /* shift the data */
3161         if (from_end) {
3162                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3163                               data_end + size_diff, btrfs_leaf_data(leaf) +
3164                               data_end, old_data_start + new_size - data_end);
3165         } else {
3166                 struct btrfs_disk_key disk_key;
3167                 u64 offset;
3168
3169                 btrfs_item_key(leaf, &disk_key, slot);
3170
3171                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3172                         unsigned long ptr;
3173                         struct btrfs_file_extent_item *fi;
3174
3175                         fi = btrfs_item_ptr(leaf, slot,
3176                                             struct btrfs_file_extent_item);
3177                         fi = (struct btrfs_file_extent_item *)(
3178                              (unsigned long)fi - size_diff);
3179
3180                         if (btrfs_file_extent_type(leaf, fi) ==
3181                             BTRFS_FILE_EXTENT_INLINE) {
3182                                 ptr = btrfs_item_ptr_offset(leaf, slot);
3183                                 memmove_extent_buffer(leaf, ptr,
3184                                       (unsigned long)fi,
3185                                       offsetof(struct btrfs_file_extent_item,
3186                                                  disk_bytenr));
3187                         }
3188                 }
3189
3190                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3191                               data_end + size_diff, btrfs_leaf_data(leaf) +
3192                               data_end, old_data_start - data_end);
3193
3194                 offset = btrfs_disk_key_offset(&disk_key);
3195                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3196                 btrfs_set_item_key(leaf, &disk_key, slot);
3197                 if (slot == 0)
3198                         fixup_low_keys(trans, root, path, &disk_key, 1);
3199         }
3200
3201         item = btrfs_item_nr(leaf, slot);
3202         btrfs_set_item_size(leaf, item, new_size);
3203         btrfs_mark_buffer_dirty(leaf);
3204
3205         ret = 0;
3206         if (btrfs_leaf_free_space(root, leaf) < 0) {
3207                 btrfs_print_leaf(root, leaf);
3208                 BUG();
3209         }
3210         return ret;
3211 }
3212
3213 /*
3214  * make the item pointed to by the path bigger, data_size is the new size.
3215  */
3216 int btrfs_extend_item(struct btrfs_trans_handle *trans,
3217                       struct btrfs_root *root, struct btrfs_path *path,
3218                       u32 data_size)
3219 {
3220         int ret = 0;
3221         int slot;
3222         int slot_orig;
3223         struct extent_buffer *leaf;
3224         struct btrfs_item *item;
3225         u32 nritems;
3226         unsigned int data_end;
3227         unsigned int old_data;
3228         unsigned int old_size;
3229         int i;
3230
3231         slot_orig = path->slots[0];
3232         leaf = path->nodes[0];
3233
3234         nritems = btrfs_header_nritems(leaf);
3235         data_end = leaf_data_end(root, leaf);
3236
3237         if (btrfs_leaf_free_space(root, leaf) < data_size) {
3238                 btrfs_print_leaf(root, leaf);
3239                 BUG();
3240         }
3241         slot = path->slots[0];
3242         old_data = btrfs_item_end_nr(leaf, slot);
3243
3244         BUG_ON(slot < 0);
3245         if (slot >= nritems) {
3246                 btrfs_print_leaf(root, leaf);
3247                 printk(KERN_CRIT "slot %d too large, nritems %d\n",
3248                        slot, nritems);
3249                 BUG_ON(1);
3250         }
3251
3252         /*
3253          * item0..itemN ... dataN.offset..dataN.size .. data0.size
3254          */
3255         /* first correct the data pointers */
3256         for (i = slot; i < nritems; i++) {
3257                 u32 ioff;
3258                 item = btrfs_item_nr(leaf, i);
3259
3260                 if (!leaf->map_token) {
3261                         map_extent_buffer(leaf, (unsigned long)item,
3262                                         sizeof(struct btrfs_item),
3263                                         &leaf->map_token, &leaf->kaddr,
3264                                         &leaf->map_start, &leaf->map_len,
3265                                         KM_USER1);
3266                 }
3267                 ioff = btrfs_item_offset(leaf, item);
3268                 btrfs_set_item_offset(leaf, item, ioff - data_size);
3269         }
3270
3271         if (leaf->map_token) {
3272                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3273                 leaf->map_token = NULL;
3274         }
3275
3276         /* shift the data */
3277         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3278                       data_end - data_size, btrfs_leaf_data(leaf) +
3279                       data_end, old_data - data_end);
3280
3281         data_end = old_data;
3282         old_size = btrfs_item_size_nr(leaf, slot);
3283         item = btrfs_item_nr(leaf, slot);
3284         btrfs_set_item_size(leaf, item, old_size + data_size);
3285         btrfs_mark_buffer_dirty(leaf);
3286
3287         ret = 0;
3288         if (btrfs_leaf_free_space(root, leaf) < 0) {
3289                 btrfs_print_leaf(root, leaf);
3290                 BUG();
3291         }
3292         return ret;
3293 }
3294
3295 /*
3296  * Given a key and some data, insert items into the tree.
3297  * This does all the path init required, making room in the tree if needed.
3298  * Returns the number of keys that were inserted.
3299  */
3300 int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3301                             struct btrfs_root *root,
3302                             struct btrfs_path *path,
3303                             struct btrfs_key *cpu_key, u32 *data_size,
3304                             int nr)
3305 {
3306         struct extent_buffer *leaf;
3307         struct btrfs_item *item;
3308         int ret = 0;
3309         int slot;
3310         int i;
3311         u32 nritems;
3312         u32 total_data = 0;
3313         u32 total_size = 0;
3314         unsigned int data_end;
3315         struct btrfs_disk_key disk_key;
3316         struct btrfs_key found_key;
3317
3318         for (i = 0; i < nr; i++) {
3319                 if (total_size + data_size[i] + sizeof(struct btrfs_item) >
3320                     BTRFS_LEAF_DATA_SIZE(root)) {
3321                         break;
3322                         nr = i;
3323                 }
3324                 total_data += data_size[i];
3325                 total_size += data_size[i] + sizeof(struct btrfs_item);
3326         }
3327         BUG_ON(nr == 0);
3328
3329         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3330         if (ret == 0)
3331                 return -EEXIST;
3332         if (ret < 0)
3333                 goto out;
3334
3335         leaf = path->nodes[0];
3336
3337         nritems = btrfs_header_nritems(leaf);
3338         data_end = leaf_data_end(root, leaf);
3339
3340         if (btrfs_leaf_free_space(root, leaf) < total_size) {
3341                 for (i = nr; i >= 0; i--) {
3342                         total_data -= data_size[i];
3343                         total_size -= data_size[i] + sizeof(struct btrfs_item);
3344                         if (total_size < btrfs_leaf_free_space(root, leaf))
3345                                 break;
3346                 }
3347                 nr = i;
3348         }
3349
3350         slot = path->slots[0];
3351         BUG_ON(slot < 0);
3352
3353         if (slot != nritems) {
3354                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3355
3356                 item = btrfs_item_nr(leaf, slot);
3357                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3358
3359                 /* figure out how many keys we can insert in here */
3360                 total_data = data_size[0];
3361                 for (i = 1; i < nr; i++) {
3362                         if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3363                                 break;
3364                         total_data += data_size[i];
3365                 }
3366                 nr = i;
3367
3368                 if (old_data < data_end) {
3369                         btrfs_print_leaf(root, leaf);
3370                         printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3371                                slot, old_data, data_end);
3372                         BUG_ON(1);
3373                 }
3374                 /*
3375                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
3376                  */
3377                 /* first correct the data pointers */
3378                 WARN_ON(leaf->map_token);
3379                 for (i = slot; i < nritems; i++) {
3380                         u32 ioff;
3381
3382                         item = btrfs_item_nr(leaf, i);
3383                         if (!leaf->map_token) {
3384                                 map_extent_buffer(leaf, (unsigned long)item,
3385                                         sizeof(struct btrfs_item),
3386                                         &leaf->map_token, &leaf->kaddr,
3387                                         &leaf->map_start, &leaf->map_len,
3388                                         KM_USER1);
3389                         }
3390
3391                         ioff = btrfs_item_offset(leaf, item);
3392                         btrfs_set_item_offset(leaf, item, ioff - total_data);
3393                 }
3394                 if (leaf->map_token) {
3395                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3396                         leaf->map_token = NULL;
3397                 }
3398
3399                 /* shift the items */
3400                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3401                               btrfs_item_nr_offset(slot),
3402                               (nritems - slot) * sizeof(struct btrfs_item));
3403
3404                 /* shift the data */
3405                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3406                               data_end - total_data, btrfs_leaf_data(leaf) +
3407                               data_end, old_data - data_end);
3408                 data_end = old_data;
3409         } else {
3410                 /*
3411                  * this sucks but it has to be done, if we are inserting at
3412                  * the end of the leaf only insert 1 of the items, since we
3413                  * have no way of knowing whats on the next leaf and we'd have
3414                  * to drop our current locks to figure it out
3415                  */
3416                 nr = 1;
3417         }
3418
3419         /* setup the item for the new data */
3420         for (i = 0; i < nr; i++) {
3421                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3422                 btrfs_set_item_key(leaf, &disk_key, slot + i);
3423                 item = btrfs_item_nr(leaf, slot + i);
3424                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3425                 data_end -= data_size[i];
3426                 btrfs_set_item_size(leaf, item, data_size[i]);
3427         }
3428         btrfs_set_header_nritems(leaf, nritems + nr);
3429         btrfs_mark_buffer_dirty(leaf);
3430
3431         ret = 0;
3432         if (slot == 0) {
3433                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3434                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3435         }
3436
3437         if (btrfs_leaf_free_space(root, leaf) < 0) {
3438                 btrfs_print_leaf(root, leaf);
3439                 BUG();
3440         }
3441 out:
3442         if (!ret)
3443                 ret = nr;
3444         return ret;
3445 }
3446
3447 /*
3448  * Given a key and some data, insert items into the tree.
3449  * This does all the path init required, making room in the tree if needed.
3450  */
3451 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3452                             struct btrfs_root *root,
3453                             struct btrfs_path *path,
3454                             struct btrfs_key *cpu_key, u32 *data_size,
3455                             int nr)
3456 {
3457         struct extent_buffer *leaf;
3458         struct btrfs_item *item;
3459         int ret = 0;
3460         int slot;
3461         int slot_orig;
3462         int i;
3463         u32 nritems;
3464         u32 total_size = 0;
3465         u32 total_data = 0;
3466         unsigned int data_end;
3467         struct btrfs_disk_key disk_key;
3468
3469         for (i = 0; i < nr; i++)
3470                 total_data += data_size[i];
3471
3472         total_size = total_data + (nr * sizeof(struct btrfs_item));
3473         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3474         if (ret == 0)
3475                 return -EEXIST;
3476         if (ret < 0)
3477                 goto out;
3478
3479         slot_orig = path->slots[0];
3480         leaf = path->nodes[0];
3481
3482         nritems = btrfs_header_nritems(leaf);
3483         data_end = leaf_data_end(root, leaf);
3484
3485         if (btrfs_leaf_free_space(root, leaf) < total_size) {
3486                 btrfs_print_leaf(root, leaf);
3487                 printk(KERN_CRIT "not enough freespace need %u have %d\n",
3488                        total_size, btrfs_leaf_free_space(root, leaf));
3489                 BUG();
3490         }
3491
3492         slot = path->slots[0];
3493         BUG_ON(slot < 0);
3494
3495         if (slot != nritems) {
3496                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3497
3498                 if (old_data < data_end) {
3499                         btrfs_print_leaf(root, leaf);
3500                         printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
3501                                slot, old_data, data_end);
3502                         BUG_ON(1);
3503                 }
3504                 /*
3505                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
3506                  */
3507                 /* first correct the data pointers */
3508                 WARN_ON(leaf->map_token);
3509                 for (i = slot; i < nritems; i++) {
3510                         u32 ioff;
3511
3512                         item = btrfs_item_nr(leaf, i);
3513                         if (!leaf->map_token) {
3514                                 map_extent_buffer(leaf, (unsigned long)item,
3515                                         sizeof(struct btrfs_item),
3516                                         &leaf->map_token, &leaf->kaddr,
3517                                         &leaf->map_start, &leaf->map_len,
3518                                         KM_USER1);
3519                         }
3520
3521                         ioff = btrfs_item_offset(leaf, item);
3522                         btrfs_set_item_offset(leaf, item, ioff - total_data);
3523                 }
3524                 if (leaf->map_token) {
3525                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3526                         leaf->map_token = NULL;
3527                 }
3528
3529                 /* shift the items */
3530                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3531                               btrfs_item_nr_offset(slot),
3532                               (nritems - slot) * sizeof(struct btrfs_item));
3533
3534                 /* shift the data */
3535                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3536                               data_end - total_data, btrfs_leaf_data(leaf) +
3537                               data_end, old_data - data_end);
3538                 data_end = old_data;
3539         }
3540
3541         /* setup the item for the new data */
3542         for (i = 0; i < nr; i++) {
3543                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3544                 btrfs_set_item_key(leaf, &disk_key, slot + i);
3545                 item = btrfs_item_nr(leaf, slot + i);
3546                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3547                 data_end -= data_size[i];
3548                 btrfs_set_item_size(leaf, item, data_size[i]);
3549         }
3550         btrfs_set_header_nritems(leaf, nritems + nr);
3551         btrfs_mark_buffer_dirty(leaf);
3552
3553         ret = 0;
3554         if (slot == 0) {
3555                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3556                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3557         }
3558
3559         if (btrfs_leaf_free_space(root, leaf) < 0) {
3560                 btrfs_print_leaf(root, leaf);
3561                 BUG();
3562         }
3563 out:
3564         btrfs_unlock_up_safe(path, 1);
3565         return ret;
3566 }
3567
3568 /*
3569  * Given a key and some data, insert an item into the tree.
3570  * This does all the path init required, making room in the tree if needed.
3571  */
3572 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3573                       *root, struct btrfs_key *cpu_key, void *data, u32
3574                       data_size)
3575 {
3576         int ret = 0;
3577         struct btrfs_path *path;
3578         struct extent_buffer *leaf;
3579         unsigned long ptr;
3580
3581         path = btrfs_alloc_path();
3582         BUG_ON(!path);
3583         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3584         if (!ret) {
3585                 leaf = path->nodes[0];
3586                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3587                 write_extent_buffer(leaf, data, ptr, data_size);
3588                 btrfs_mark_buffer_dirty(leaf);
3589         }
3590         btrfs_free_path(path);
3591         return ret;
3592 }
3593
3594 /*
3595  * delete the pointer from a given node.
3596  *
3597  * the tree should have been previously balanced so the deletion does not
3598  * empty a node.
3599  */
3600 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3601                    struct btrfs_path *path, int level, int slot)
3602 {
3603         struct extent_buffer *parent = path->nodes[level];
3604         u32 nritems;
3605         int ret = 0;
3606         int wret;
3607
3608         nritems = btrfs_header_nritems(parent);
3609         if (slot != nritems - 1) {
3610                 memmove_extent_buffer(parent,
3611                               btrfs_node_key_ptr_offset(slot),
3612                               btrfs_node_key_ptr_offset(slot + 1),
3613                               sizeof(struct btrfs_key_ptr) *
3614                               (nritems - slot - 1));
3615         }
3616         nritems--;
3617         btrfs_set_header_nritems(parent, nritems);
3618         if (nritems == 0 && parent == root->node) {
3619                 BUG_ON(btrfs_header_level(root->node) != 1);
3620                 /* just turn the root into a leaf and break */
3621                 btrfs_set_header_level(root->node, 0);
3622         } else if (slot == 0) {
3623                 struct btrfs_disk_key disk_key;
3624
3625                 btrfs_node_key(parent, &disk_key, 0);
3626                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
3627                 if (wret)
3628                         ret = wret;
3629         }
3630         btrfs_mark_buffer_dirty(parent);
3631         return ret;
3632 }
3633
3634 /*
3635  * a helper function to delete the leaf pointed to by path->slots[1] and
3636  * path->nodes[1].  bytenr is the node block pointer, but since the callers
3637  * already know it, it is faster to have them pass it down than to
3638  * read it out of the node again.
3639  *
3640  * This deletes the pointer in path->nodes[1] and frees the leaf
3641  * block extent.  zero is returned if it all worked out, < 0 otherwise.
3642  *
3643  * The path must have already been setup for deleting the leaf, including
3644  * all the proper balancing.  path->nodes[1] must be locked.
3645  */
3646 noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3647                             struct btrfs_root *root,
3648                             struct btrfs_path *path, u64 bytenr)
3649 {
3650         int ret;
3651         u64 root_gen = btrfs_header_generation(path->nodes[1]);
3652         u64 parent_start = path->nodes[1]->start;
3653         u64 parent_owner = btrfs_header_owner(path->nodes[1]);
3654
3655         ret = del_ptr(trans, root, path, 1, path->slots[1]);
3656         if (ret)
3657                 return ret;
3658
3659         /*
3660          * btrfs_free_extent is expensive, we want to make sure we
3661          * aren't holding any locks when we call it
3662          */
3663         btrfs_unlock_up_safe(path, 0);
3664
3665         ret = btrfs_free_extent(trans, root, bytenr,
3666                                 btrfs_level_size(root, 0),
3667                                 parent_start, parent_owner,
3668                                 root_gen, 0, 1);
3669         return ret;
3670 }
3671 /*
3672  * delete the item at the leaf level in path.  If that empties
3673  * the leaf, remove it from the tree
3674  */
3675 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3676                     struct btrfs_path *path, int slot, int nr)
3677 {
3678         struct extent_buffer *leaf;
3679         struct btrfs_item *item;
3680         int last_off;
3681         int dsize = 0;
3682         int ret = 0;
3683         int wret;
3684         int i;
3685         u32 nritems;
3686
3687         leaf = path->nodes[0];
3688         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3689
3690         for (i = 0; i < nr; i++)
3691                 dsize += btrfs_item_size_nr(leaf, slot + i);
3692
3693         nritems = btrfs_header_nritems(leaf);
3694
3695         if (slot + nr != nritems) {
3696                 int data_end = leaf_data_end(root, leaf);
3697
3698                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3699                               data_end + dsize,
3700                               btrfs_leaf_data(leaf) + data_end,
3701                               last_off - data_end);
3702
3703                 for (i = slot + nr; i < nritems; i++) {
3704                         u32 ioff;
3705
3706                         item = btrfs_item_nr(leaf, i);
3707                         if (!leaf->map_token) {
3708                                 map_extent_buffer(leaf, (unsigned long)item,
3709                                         sizeof(struct btrfs_item),
3710                                         &leaf->map_token, &leaf->kaddr,
3711                                         &leaf->map_start, &leaf->map_len,
3712                                         KM_USER1);
3713                         }
3714                         ioff = btrfs_item_offset(leaf, item);
3715                         btrfs_set_item_offset(leaf, item, ioff + dsize);
3716                 }
3717
3718                 if (leaf->map_token) {
3719                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3720                         leaf->map_token = NULL;
3721                 }
3722
3723                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
3724                               btrfs_item_nr_offset(slot + nr),
3725                               sizeof(struct btrfs_item) *
3726                               (nritems - slot - nr));
3727         }
3728         btrfs_set_header_nritems(leaf, nritems - nr);
3729         nritems -= nr;
3730
3731         /* delete the leaf if we've emptied it */
3732         if (nritems == 0) {
3733                 if (leaf == root->node) {
3734                         btrfs_set_header_level(leaf, 0);
3735                 } else {
3736                         ret = btrfs_del_leaf(trans, root, path, leaf->start);
3737                         BUG_ON(ret);
3738                 }
3739         } else {
3740                 int used = leaf_space_used(leaf, 0, nritems);
3741                 if (slot == 0) {
3742                         struct btrfs_disk_key disk_key;
3743
3744                         btrfs_item_key(leaf, &disk_key, 0);
3745                         wret = fixup_low_keys(trans, root, path,
3746                                               &disk_key, 1);
3747                         if (wret)
3748                                 ret = wret;
3749                 }
3750
3751                 /* delete the leaf if it is mostly empty */
3752                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
3753                         /* push_leaf_left fixes the path.
3754                          * make sure the path still points to our leaf
3755                          * for possible call to del_ptr below
3756                          */
3757                         slot = path->slots[1];
3758                         extent_buffer_get(leaf);
3759
3760                         wret = push_leaf_left(trans, root, path, 1, 1);
3761                         if (wret < 0 && wret != -ENOSPC)
3762                                 ret = wret;
3763
3764                         if (path->nodes[0] == leaf &&
3765                             btrfs_header_nritems(leaf)) {
3766                                 wret = push_leaf_right(trans, root, path, 1, 1);
3767                                 if (wret < 0 && wret != -ENOSPC)
3768                                         ret = wret;
3769                         }
3770
3771                         if (btrfs_header_nritems(leaf) == 0) {
3772                                 path->slots[1] = slot;
3773                                 ret = btrfs_del_leaf(trans, root, path,
3774                                                      leaf->start);
3775                                 BUG_ON(ret);
3776                                 free_extent_buffer(leaf);
3777                         } else {
3778                                 /* if we're still in the path, make sure
3779                                  * we're dirty.  Otherwise, one of the
3780                                  * push_leaf functions must have already
3781                                  * dirtied this buffer
3782                                  */
3783                                 if (path->nodes[0] == leaf)
3784                                         btrfs_mark_buffer_dirty(leaf);
3785                                 free_extent_buffer(leaf);
3786                         }
3787                 } else {
3788                         btrfs_mark_buffer_dirty(leaf);
3789                 }
3790         }
3791         return ret;
3792 }
3793
3794 /*
3795  * search the tree again to find a leaf with lesser keys
3796  * returns 0 if it found something or 1 if there are no lesser leaves.
3797  * returns < 0 on io errors.
3798  *
3799  * This may release the path, and so you may lose any locks held at the
3800  * time you call it.
3801  */
3802 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3803 {
3804         struct btrfs_key key;
3805         struct btrfs_disk_key found_key;
3806         int ret;
3807
3808         btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
3809
3810         if (key.offset > 0)
3811                 key.offset--;
3812         else if (key.type > 0)
3813                 key.type--;
3814         else if (key.objectid > 0)
3815                 key.objectid--;
3816         else
3817                 return 1;
3818
3819         btrfs_release_path(root, path);
3820         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3821         if (ret < 0)
3822                 return ret;
3823         btrfs_item_key(path->nodes[0], &found_key, 0);
3824         ret = comp_keys(&found_key, &key);
3825         if (ret < 0)
3826                 return 0;
3827         return 1;
3828 }
3829
3830 /*
3831  * A helper function to walk down the tree starting at min_key, and looking
3832  * for nodes or leaves that are either in cache or have a minimum
3833  * transaction id.  This is used by the btree defrag code, and tree logging
3834  *
3835  * This does not cow, but it does stuff the starting key it finds back
3836  * into min_key, so you can call btrfs_search_slot with cow=1 on the
3837  * key and get a writable path.
3838  *
3839  * This does lock as it descends, and path->keep_locks should be set
3840  * to 1 by the caller.
3841  *
3842  * This honors path->lowest_level to prevent descent past a given level
3843  * of the tree.
3844  *
3845  * min_trans indicates the oldest transaction that you are interested
3846  * in walking through.  Any nodes or leaves older than min_trans are
3847  * skipped over (without reading them).
3848  *
3849  * returns zero if something useful was found, < 0 on error and 1 if there
3850  * was nothing in the tree that matched the search criteria.
3851  */
3852 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3853                          struct btrfs_key *max_key,
3854                          struct btrfs_path *path, int cache_only,
3855                          u64 min_trans)
3856 {
3857         struct extent_buffer *cur;
3858         struct btrfs_key found_key;
3859         int slot;
3860         int sret;
3861         u32 nritems;
3862         int level;
3863         int ret = 1;
3864
3865         WARN_ON(!path->keep_locks);
3866 again:
3867         cur = btrfs_lock_root_node(root);
3868         level = btrfs_header_level(cur);
3869         WARN_ON(path->nodes[level]);
3870         path->nodes[level] = cur;
3871         path->locks[level] = 1;
3872
3873         if (btrfs_header_generation(cur) < min_trans) {
3874                 ret = 1;
3875                 goto out;
3876         }
3877         while (1) {
3878                 nritems = btrfs_header_nritems(cur);
3879                 level = btrfs_header_level(cur);
3880                 sret = bin_search(cur, min_key, level, &slot);
3881
3882                 /* at the lowest level, we're done, setup the path and exit */
3883                 if (level == path->lowest_level) {
3884                         if (slot >= nritems)
3885                                 goto find_next_key;
3886                         ret = 0;
3887                         path->slots[level] = slot;
3888                         btrfs_item_key_to_cpu(cur, &found_key, slot);
3889                         goto out;
3890                 }
3891                 if (sret && slot > 0)
3892                         slot--;
3893                 /*
3894                  * check this node pointer against the cache_only and
3895                  * min_trans parameters.  If it isn't in cache or is too
3896                  * old, skip to the next one.
3897                  */
3898                 while (slot < nritems) {
3899                         u64 blockptr;
3900                         u64 gen;
3901                         struct extent_buffer *tmp;
3902                         struct btrfs_disk_key disk_key;
3903
3904                         blockptr = btrfs_node_blockptr(cur, slot);
3905                         gen = btrfs_node_ptr_generation(cur, slot);
3906                         if (gen < min_trans) {
3907                                 slot++;
3908                                 continue;
3909                         }
3910                         if (!cache_only)
3911                                 break;
3912
3913                         if (max_key) {
3914                                 btrfs_node_key(cur, &disk_key, slot);
3915                                 if (comp_keys(&disk_key, max_key) >= 0) {
3916                                         ret = 1;
3917                                         goto out;
3918                                 }
3919                         }
3920
3921                         tmp = btrfs_find_tree_block(root, blockptr,
3922                                             btrfs_level_size(root, level - 1));
3923
3924                         if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
3925                                 free_extent_buffer(tmp);
3926                                 break;
3927                         }
3928                         if (tmp)
3929                                 free_extent_buffer(tmp);
3930                         slot++;
3931                 }
3932 find_next_key:
3933                 /*
3934                  * we didn't find a candidate key in this node, walk forward
3935                  * and find another one
3936                  */
3937                 if (slot >= nritems) {
3938                         path->slots[level] = slot;
3939                         btrfs_set_path_blocking(path);
3940                         sret = btrfs_find_next_key(root, path, min_key, level,
3941                                                   cache_only, min_trans);
3942                         if (sret == 0) {
3943                                 btrfs_release_path(root, path);
3944                                 goto again;
3945                         } else {
3946                                 goto out;
3947                         }
3948                 }
3949                 /* save our key for returning back */
3950                 btrfs_node_key_to_cpu(cur, &found_key, slot);
3951                 path->slots[level] = slot;
3952                 if (level == path->lowest_level) {
3953                         ret = 0;
3954                         unlock_up(path, level, 1);
3955                         goto out;
3956                 }
3957                 btrfs_set_path_blocking(path);
3958                 cur = read_node_slot(root, cur, slot);
3959
3960                 btrfs_tree_lock(cur);
3961
3962                 path->locks[level - 1] = 1;
3963                 path->nodes[level - 1] = cur;
3964                 unlock_up(path, level, 1);
3965                 btrfs_clear_path_blocking(path, NULL);
3966         }
3967 out:
3968         if (ret == 0)
3969                 memcpy(min_key, &found_key, sizeof(found_key));
3970         btrfs_set_path_blocking(path);
3971         return ret;
3972 }
3973
3974 /*
3975  * this is similar to btrfs_next_leaf, but does not try to preserve
3976  * and fixup the path.  It looks for and returns the next key in the
3977  * tree based on the current path and the cache_only and min_trans
3978  * parameters.
3979  *
3980  * 0 is returned if another key is found, < 0 if there are any errors
3981  * and 1 is returned if there are no higher keys in the tree
3982  *
3983  * path->keep_locks should be set to 1 on the search made before
3984  * calling this function.
3985  */
3986 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
3987                         struct btrfs_key *key, int lowest_level,
3988                         int cache_only, u64 min_trans)
3989 {
3990         int level = lowest_level;
3991         int slot;
3992         struct extent_buffer *c;
3993
3994         WARN_ON(!path->keep_locks);
3995         while (level < BTRFS_MAX_LEVEL) {
3996                 if (!path->nodes[level])
3997                         return 1;
3998
3999                 slot = path->slots[level] + 1;
4000                 c = path->nodes[level];
4001 next:
4002                 if (slot >= btrfs_header_nritems(c)) {
4003                         level++;
4004                         if (level == BTRFS_MAX_LEVEL)
4005                                 return 1;
4006                         continue;
4007                 }
4008                 if (level == 0)
4009                         btrfs_item_key_to_cpu(c, key, slot);
4010                 else {
4011                         u64 blockptr = btrfs_node_blockptr(c, slot);
4012                         u64 gen = btrfs_node_ptr_generation(c, slot);
4013
4014                         if (cache_only) {
4015                                 struct extent_buffer *cur;
4016                                 cur = btrfs_find_tree_block(root, blockptr,
4017                                             btrfs_level_size(root, level - 1));
4018                                 if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
4019                                         slot++;
4020                                         if (cur)
4021                                                 free_extent_buffer(cur);
4022                                         goto next;
4023                                 }
4024                                 free_extent_buffer(cur);
4025                         }
4026                         if (gen < min_trans) {
4027                                 slot++;
4028                                 goto next;
4029                         }
4030                         btrfs_node_key_to_cpu(c, key, slot);
4031                 }
4032                 return 0;
4033         }
4034         return 1;
4035 }
4036
4037 /*
4038  * search the tree again to find a leaf with greater keys
4039  * returns 0 if it found something or 1 if there are no greater leaves.
4040  * returns < 0 on io errors.
4041  */
4042 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
4043 {
4044         int slot;
4045         int level = 1;
4046         struct extent_buffer *c;
4047         struct extent_buffer *next = NULL;
4048         struct btrfs_key key;
4049         u32 nritems;
4050         int ret;
4051
4052         nritems = btrfs_header_nritems(path->nodes[0]);
4053         if (nritems == 0)
4054                 return 1;
4055
4056         btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
4057
4058         btrfs_release_path(root, path);
4059         path->keep_locks = 1;
4060         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4061         path->keep_locks = 0;
4062
4063         if (ret < 0)
4064                 return ret;
4065
4066         btrfs_set_path_blocking(path);
4067         nritems = btrfs_header_nritems(path->nodes[0]);
4068         /*
4069          * by releasing the path above we dropped all our locks.  A balance
4070          * could have added more items next to the key that used to be
4071          * at the very end of the block.  So, check again here and
4072          * advance the path if there are now more items available.
4073          */
4074         if (nritems > 0 && path->slots[0] < nritems - 1) {
4075                 path->slots[0]++;
4076                 goto done;
4077         }
4078
4079         while (level < BTRFS_MAX_LEVEL) {
4080                 if (!path->nodes[level])
4081                         return 1;
4082
4083                 slot = path->slots[level] + 1;
4084                 c = path->nodes[level];
4085                 if (slot >= btrfs_header_nritems(c)) {
4086                         level++;
4087                         if (level == BTRFS_MAX_LEVEL)
4088                                 return 1;
4089                         continue;
4090                 }
4091
4092                 if (next) {
4093                         btrfs_tree_unlock(next);
4094                         free_extent_buffer(next);
4095                 }
4096
4097                 /* the path was set to blocking above */
4098                 if (level == 1 && (path->locks[1] || path->skip_locking) &&
4099                     path->reada)
4100                         reada_for_search(root, path, level, slot, 0);
4101
4102                 next = read_node_slot(root, c, slot);
4103                 if (!path->skip_locking) {
4104                         btrfs_assert_tree_locked(c);
4105                         btrfs_tree_lock(next);
4106                         btrfs_set_lock_blocking(next);
4107                 }
4108                 break;
4109         }
4110         path->slots[level] = slot;
4111         while (1) {
4112                 level--;
4113                 c = path->nodes[level];
4114                 if (path->locks[level])
4115                         btrfs_tree_unlock(c);
4116                 free_extent_buffer(c);
4117                 path->nodes[level] = next;
4118                 path->slots[level] = 0;
4119                 if (!path->skip_locking)
4120                         path->locks[level] = 1;
4121                 if (!level)
4122                         break;
4123
4124                 btrfs_set_path_blocking(path);
4125                 if (level == 1 && path->locks[1] && path->reada)
4126                         reada_for_search(root, path, level, slot, 0);
4127                 next = read_node_slot(root, next, 0);
4128                 if (!path->skip_locking) {
4129                         btrfs_assert_tree_locked(path->nodes[level]);
4130                         btrfs_tree_lock(next);
4131                         btrfs_set_lock_blocking(next);
4132                 }
4133         }
4134 done:
4135         unlock_up(path, 0, 1);
4136         return 0;
4137 }
4138
4139 /*
4140  * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
4141  * searching until it gets past min_objectid or finds an item of 'type'
4142  *
4143  * returns 0 if something is found, 1 if nothing was found and < 0 on error
4144  */
4145 int btrfs_previous_item(struct btrfs_root *root,
4146                         struct btrfs_path *path, u64 min_objectid,
4147                         int type)
4148 {
4149         struct btrfs_key found_key;
4150         struct extent_buffer *leaf;
4151         u32 nritems;
4152         int ret;
4153
4154         while (1) {
4155                 if (path->slots[0] == 0) {
4156                         btrfs_set_path_blocking(path);
4157                         ret = btrfs_prev_leaf(root, path);
4158                         if (ret != 0)
4159                                 return ret;
4160                 } else {
4161                         path->slots[0]--;
4162                 }
4163                 leaf = path->nodes[0];
4164                 nritems = btrfs_header_nritems(leaf);
4165                 if (nritems == 0)
4166                         return 1;
4167                 if (path->slots[0] == nritems)
4168                         path->slots[0]--;
4169
4170                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4171                 if (found_key.type == type)
4172                         return 0;
4173                 if (found_key.objectid < min_objectid)
4174                         break;
4175                 if (found_key.objectid == min_objectid &&
4176                     found_key.type < type)
4177                         break;
4178         }
4179         return 1;
4180 }