Btrfs: Fix compressed writes on truncated pages
[linux-2.6] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007,2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24 #include "locking.h"
25
26 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
27                       *root, struct btrfs_path *path, int level);
28 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
29                       *root, struct btrfs_key *ins_key,
30                       struct btrfs_path *path, int data_size, int extend);
31 static int push_node_left(struct btrfs_trans_handle *trans,
32                           struct btrfs_root *root, struct extent_buffer *dst,
33                           struct extent_buffer *src, int empty);
34 static int balance_node_right(struct btrfs_trans_handle *trans,
35                               struct btrfs_root *root,
36                               struct extent_buffer *dst_buf,
37                               struct extent_buffer *src_buf);
38 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
39                    struct btrfs_path *path, int level, int slot);
40
41 inline void btrfs_init_path(struct btrfs_path *p)
42 {
43         memset(p, 0, sizeof(*p));
44 }
45
46 struct btrfs_path *btrfs_alloc_path(void)
47 {
48         struct btrfs_path *path;
49         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
50         if (path) {
51                 btrfs_init_path(path);
52                 path->reada = 1;
53         }
54         return path;
55 }
56
57 /* this also releases the path */
58 void btrfs_free_path(struct btrfs_path *p)
59 {
60         btrfs_release_path(NULL, p);
61         kmem_cache_free(btrfs_path_cachep, p);
62 }
63
64 /*
65  * path release drops references on the extent buffers in the path
66  * and it drops any locks held by this path
67  *
68  * It is safe to call this on paths that no locks or extent buffers held.
69  */
70 void noinline btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
71 {
72         int i;
73
74         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
75                 p->slots[i] = 0;
76                 if (!p->nodes[i])
77                         continue;
78                 if (p->locks[i]) {
79                         btrfs_tree_unlock(p->nodes[i]);
80                         p->locks[i] = 0;
81                 }
82                 free_extent_buffer(p->nodes[i]);
83                 p->nodes[i] = NULL;
84         }
85 }
86
87 /*
88  * safely gets a reference on the root node of a tree.  A lock
89  * is not taken, so a concurrent writer may put a different node
90  * at the root of the tree.  See btrfs_lock_root_node for the
91  * looping required.
92  *
93  * The extent buffer returned by this has a reference taken, so
94  * it won't disappear.  It may stop being the root of the tree
95  * at any time because there are no locks held.
96  */
97 struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
98 {
99         struct extent_buffer *eb;
100         spin_lock(&root->node_lock);
101         eb = root->node;
102         extent_buffer_get(eb);
103         spin_unlock(&root->node_lock);
104         return eb;
105 }
106
107 /* loop around taking references on and locking the root node of the
108  * tree until you end up with a lock on the root.  A locked buffer
109  * is returned, with a reference held.
110  */
111 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
112 {
113         struct extent_buffer *eb;
114
115         while(1) {
116                 eb = btrfs_root_node(root);
117                 btrfs_tree_lock(eb);
118
119                 spin_lock(&root->node_lock);
120                 if (eb == root->node) {
121                         spin_unlock(&root->node_lock);
122                         break;
123                 }
124                 spin_unlock(&root->node_lock);
125
126                 btrfs_tree_unlock(eb);
127                 free_extent_buffer(eb);
128         }
129         return eb;
130 }
131
132 /* cowonly root (everything not a reference counted cow subvolume), just get
133  * put onto a simple dirty list.  transaction.c walks this to make sure they
134  * get properly updated on disk.
135  */
136 static void add_root_to_dirty_list(struct btrfs_root *root)
137 {
138         if (root->track_dirty && list_empty(&root->dirty_list)) {
139                 list_add(&root->dirty_list,
140                          &root->fs_info->dirty_cowonly_roots);
141         }
142 }
143
144 /*
145  * used by snapshot creation to make a copy of a root for a tree with
146  * a given objectid.  The buffer with the new root node is returned in
147  * cow_ret, and this func returns zero on success or a negative error code.
148  */
149 int btrfs_copy_root(struct btrfs_trans_handle *trans,
150                       struct btrfs_root *root,
151                       struct extent_buffer *buf,
152                       struct extent_buffer **cow_ret, u64 new_root_objectid)
153 {
154         struct extent_buffer *cow;
155         u32 nritems;
156         int ret = 0;
157         int level;
158         struct btrfs_root *new_root;
159
160         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
161         if (!new_root)
162                 return -ENOMEM;
163
164         memcpy(new_root, root, sizeof(*new_root));
165         new_root->root_key.objectid = new_root_objectid;
166
167         WARN_ON(root->ref_cows && trans->transid !=
168                 root->fs_info->running_transaction->transid);
169         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
170
171         level = btrfs_header_level(buf);
172         nritems = btrfs_header_nritems(buf);
173
174         cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
175                                      new_root_objectid, trans->transid,
176                                      level, buf->start, 0);
177         if (IS_ERR(cow)) {
178                 kfree(new_root);
179                 return PTR_ERR(cow);
180         }
181
182         copy_extent_buffer(cow, buf, 0, 0, cow->len);
183         btrfs_set_header_bytenr(cow, cow->start);
184         btrfs_set_header_generation(cow, trans->transid);
185         btrfs_set_header_owner(cow, new_root_objectid);
186         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
187
188         write_extent_buffer(cow, root->fs_info->fsid,
189                             (unsigned long)btrfs_header_fsid(cow),
190                             BTRFS_FSID_SIZE);
191
192         WARN_ON(btrfs_header_generation(buf) > trans->transid);
193         ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
194         kfree(new_root);
195
196         if (ret)
197                 return ret;
198
199         btrfs_mark_buffer_dirty(cow);
200         *cow_ret = cow;
201         return 0;
202 }
203
204 /*
205  * does the dirty work in cow of a single block.  The parent block
206  * (if supplied) is updated to point to the new cow copy.  The new
207  * buffer is marked dirty and returned locked.  If you modify the block
208  * it needs to be marked dirty again.
209  *
210  * search_start -- an allocation hint for the new block
211  *
212  * empty_size -- a hint that you plan on doing more cow.  This is the size in bytes
213  * the allocator should try to find free next to the block it returns.  This is
214  * just a hint and may be ignored by the allocator.
215  *
216  * prealloc_dest -- if you have already reserved a destination for the cow,
217  * this uses that block instead of allocating a new one.  btrfs_alloc_reserved_extent
218  * is used to finish the allocation.
219  */
220 static int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans,
221                              struct btrfs_root *root,
222                              struct extent_buffer *buf,
223                              struct extent_buffer *parent, int parent_slot,
224                              struct extent_buffer **cow_ret,
225                              u64 search_start, u64 empty_size,
226                              u64 prealloc_dest)
227 {
228         u64 parent_start;
229         struct extent_buffer *cow;
230         u32 nritems;
231         int ret = 0;
232         int level;
233         int unlock_orig = 0;
234
235         if (*cow_ret == buf)
236                 unlock_orig = 1;
237
238         WARN_ON(!btrfs_tree_locked(buf));
239
240         if (parent)
241                 parent_start = parent->start;
242         else
243                 parent_start = 0;
244
245         WARN_ON(root->ref_cows && trans->transid !=
246                 root->fs_info->running_transaction->transid);
247         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
248
249         level = btrfs_header_level(buf);
250         nritems = btrfs_header_nritems(buf);
251
252         if (prealloc_dest) {
253                 struct btrfs_key ins;
254
255                 ins.objectid = prealloc_dest;
256                 ins.offset = buf->len;
257                 ins.type = BTRFS_EXTENT_ITEM_KEY;
258
259                 ret = btrfs_alloc_reserved_extent(trans, root, parent_start,
260                                                   root->root_key.objectid,
261                                                   trans->transid, level, &ins);
262                 BUG_ON(ret);
263                 cow = btrfs_init_new_buffer(trans, root, prealloc_dest,
264                                             buf->len);
265         } else {
266                 cow = btrfs_alloc_free_block(trans, root, buf->len,
267                                              parent_start,
268                                              root->root_key.objectid,
269                                              trans->transid, level,
270                                              search_start, empty_size);
271         }
272         if (IS_ERR(cow))
273                 return PTR_ERR(cow);
274
275         copy_extent_buffer(cow, buf, 0, 0, cow->len);
276         btrfs_set_header_bytenr(cow, cow->start);
277         btrfs_set_header_generation(cow, trans->transid);
278         btrfs_set_header_owner(cow, root->root_key.objectid);
279         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
280
281         write_extent_buffer(cow, root->fs_info->fsid,
282                             (unsigned long)btrfs_header_fsid(cow),
283                             BTRFS_FSID_SIZE);
284
285         WARN_ON(btrfs_header_generation(buf) > trans->transid);
286         if (btrfs_header_generation(buf) != trans->transid) {
287                 u32 nr_extents;
288                 ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents);
289                 if (ret)
290                         return ret;
291
292                 ret = btrfs_cache_ref(trans, root, buf, nr_extents);
293                 WARN_ON(ret);
294         } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) {
295                 /*
296                  * There are only two places that can drop reference to
297                  * tree blocks owned by living reloc trees, one is here,
298                  * the other place is btrfs_drop_subtree. In both places,
299                  * we check reference count while tree block is locked.
300                  * Furthermore, if reference count is one, it won't get
301                  * increased by someone else.
302                  */
303                 u32 refs;
304                 ret = btrfs_lookup_extent_ref(trans, root, buf->start,
305                                               buf->len, &refs);
306                 BUG_ON(ret);
307                 if (refs == 1) {
308                         ret = btrfs_update_ref(trans, root, buf, cow,
309                                                0, nritems);
310                         clean_tree_block(trans, root, buf);
311                 } else {
312                         ret = btrfs_inc_ref(trans, root, buf, cow, NULL);
313                 }
314                 BUG_ON(ret);
315         } else {
316                 ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems);
317                 if (ret)
318                         return ret;
319                 clean_tree_block(trans, root, buf);
320         }
321
322         if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
323                 ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start);
324                 WARN_ON(ret);
325         }
326
327         if (buf == root->node) {
328                 WARN_ON(parent && parent != buf);
329
330                 spin_lock(&root->node_lock);
331                 root->node = cow;
332                 extent_buffer_get(cow);
333                 spin_unlock(&root->node_lock);
334
335                 if (buf != root->commit_root) {
336                         btrfs_free_extent(trans, root, buf->start,
337                                           buf->len, buf->start,
338                                           root->root_key.objectid,
339                                           btrfs_header_generation(buf),
340                                           level, 1);
341                 }
342                 free_extent_buffer(buf);
343                 add_root_to_dirty_list(root);
344         } else {
345                 btrfs_set_node_blockptr(parent, parent_slot,
346                                         cow->start);
347                 WARN_ON(trans->transid == 0);
348                 btrfs_set_node_ptr_generation(parent, parent_slot,
349                                               trans->transid);
350                 btrfs_mark_buffer_dirty(parent);
351                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
352                 btrfs_free_extent(trans, root, buf->start, buf->len,
353                                   parent_start, btrfs_header_owner(parent),
354                                   btrfs_header_generation(parent), level, 1);
355         }
356         if (unlock_orig)
357                 btrfs_tree_unlock(buf);
358         free_extent_buffer(buf);
359         btrfs_mark_buffer_dirty(cow);
360         *cow_ret = cow;
361         return 0;
362 }
363
364 /*
365  * cows a single block, see __btrfs_cow_block for the real work.
366  * This version of it has extra checks so that a block isn't cow'd more than
367  * once per transaction, as long as it hasn't been written yet
368  */
369 int noinline btrfs_cow_block(struct btrfs_trans_handle *trans,
370                     struct btrfs_root *root, struct extent_buffer *buf,
371                     struct extent_buffer *parent, int parent_slot,
372                     struct extent_buffer **cow_ret, u64 prealloc_dest)
373 {
374         u64 search_start;
375         int ret;
376
377         if (trans->transaction != root->fs_info->running_transaction) {
378                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
379                        root->fs_info->running_transaction->transid);
380                 WARN_ON(1);
381         }
382         if (trans->transid != root->fs_info->generation) {
383                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
384                        root->fs_info->generation);
385                 WARN_ON(1);
386         }
387
388         spin_lock(&root->fs_info->hash_lock);
389         if (btrfs_header_generation(buf) == trans->transid &&
390             btrfs_header_owner(buf) == root->root_key.objectid &&
391             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
392                 *cow_ret = buf;
393                 spin_unlock(&root->fs_info->hash_lock);
394                 WARN_ON(prealloc_dest);
395                 return 0;
396         }
397         spin_unlock(&root->fs_info->hash_lock);
398         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
399         ret = __btrfs_cow_block(trans, root, buf, parent,
400                                  parent_slot, cow_ret, search_start, 0,
401                                  prealloc_dest);
402         return ret;
403 }
404
405 /*
406  * helper function for defrag to decide if two blocks pointed to by a
407  * node are actually close by
408  */
409 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
410 {
411         if (blocknr < other && other - (blocknr + blocksize) < 32768)
412                 return 1;
413         if (blocknr > other && blocknr - (other + blocksize) < 32768)
414                 return 1;
415         return 0;
416 }
417
418 /*
419  * compare two keys in a memcmp fashion
420  */
421 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
422 {
423         struct btrfs_key k1;
424
425         btrfs_disk_key_to_cpu(&k1, disk);
426
427         if (k1.objectid > k2->objectid)
428                 return 1;
429         if (k1.objectid < k2->objectid)
430                 return -1;
431         if (k1.type > k2->type)
432                 return 1;
433         if (k1.type < k2->type)
434                 return -1;
435         if (k1.offset > k2->offset)
436                 return 1;
437         if (k1.offset < k2->offset)
438                 return -1;
439         return 0;
440 }
441
442 /*
443  * same as comp_keys only with two btrfs_key's
444  */
445 static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
446 {
447         if (k1->objectid > k2->objectid)
448                 return 1;
449         if (k1->objectid < k2->objectid)
450                 return -1;
451         if (k1->type > k2->type)
452                 return 1;
453         if (k1->type < k2->type)
454                 return -1;
455         if (k1->offset > k2->offset)
456                 return 1;
457         if (k1->offset < k2->offset)
458                 return -1;
459         return 0;
460 }
461
462 /*
463  * this is used by the defrag code to go through all the
464  * leaves pointed to by a node and reallocate them so that
465  * disk order is close to key order
466  */
467 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
468                        struct btrfs_root *root, struct extent_buffer *parent,
469                        int start_slot, int cache_only, u64 *last_ret,
470                        struct btrfs_key *progress)
471 {
472         struct extent_buffer *cur;
473         u64 blocknr;
474         u64 gen;
475         u64 search_start = *last_ret;
476         u64 last_block = 0;
477         u64 other;
478         u32 parent_nritems;
479         int end_slot;
480         int i;
481         int err = 0;
482         int parent_level;
483         int uptodate;
484         u32 blocksize;
485         int progress_passed = 0;
486         struct btrfs_disk_key disk_key;
487
488         parent_level = btrfs_header_level(parent);
489         if (cache_only && parent_level != 1)
490                 return 0;
491
492         if (trans->transaction != root->fs_info->running_transaction) {
493                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
494                        root->fs_info->running_transaction->transid);
495                 WARN_ON(1);
496         }
497         if (trans->transid != root->fs_info->generation) {
498                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
499                        root->fs_info->generation);
500                 WARN_ON(1);
501         }
502
503         parent_nritems = btrfs_header_nritems(parent);
504         blocksize = btrfs_level_size(root, parent_level - 1);
505         end_slot = parent_nritems;
506
507         if (parent_nritems == 1)
508                 return 0;
509
510         for (i = start_slot; i < end_slot; i++) {
511                 int close = 1;
512
513                 if (!parent->map_token) {
514                         map_extent_buffer(parent,
515                                         btrfs_node_key_ptr_offset(i),
516                                         sizeof(struct btrfs_key_ptr),
517                                         &parent->map_token, &parent->kaddr,
518                                         &parent->map_start, &parent->map_len,
519                                         KM_USER1);
520                 }
521                 btrfs_node_key(parent, &disk_key, i);
522                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
523                         continue;
524
525                 progress_passed = 1;
526                 blocknr = btrfs_node_blockptr(parent, i);
527                 gen = btrfs_node_ptr_generation(parent, i);
528                 if (last_block == 0)
529                         last_block = blocknr;
530
531                 if (i > 0) {
532                         other = btrfs_node_blockptr(parent, i - 1);
533                         close = close_blocks(blocknr, other, blocksize);
534                 }
535                 if (!close && i < end_slot - 2) {
536                         other = btrfs_node_blockptr(parent, i + 1);
537                         close = close_blocks(blocknr, other, blocksize);
538                 }
539                 if (close) {
540                         last_block = blocknr;
541                         continue;
542                 }
543                 if (parent->map_token) {
544                         unmap_extent_buffer(parent, parent->map_token,
545                                             KM_USER1);
546                         parent->map_token = NULL;
547                 }
548
549                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
550                 if (cur)
551                         uptodate = btrfs_buffer_uptodate(cur, gen);
552                 else
553                         uptodate = 0;
554                 if (!cur || !uptodate) {
555                         if (cache_only) {
556                                 free_extent_buffer(cur);
557                                 continue;
558                         }
559                         if (!cur) {
560                                 cur = read_tree_block(root, blocknr,
561                                                          blocksize, gen);
562                         } else if (!uptodate) {
563                                 btrfs_read_buffer(cur, gen);
564                         }
565                 }
566                 if (search_start == 0)
567                         search_start = last_block;
568
569                 btrfs_tree_lock(cur);
570                 err = __btrfs_cow_block(trans, root, cur, parent, i,
571                                         &cur, search_start,
572                                         min(16 * blocksize,
573                                             (end_slot - i) * blocksize), 0);
574                 if (err) {
575                         btrfs_tree_unlock(cur);
576                         free_extent_buffer(cur);
577                         break;
578                 }
579                 search_start = cur->start;
580                 last_block = cur->start;
581                 *last_ret = search_start;
582                 btrfs_tree_unlock(cur);
583                 free_extent_buffer(cur);
584         }
585         if (parent->map_token) {
586                 unmap_extent_buffer(parent, parent->map_token,
587                                     KM_USER1);
588                 parent->map_token = NULL;
589         }
590         return err;
591 }
592
593 /*
594  * The leaf data grows from end-to-front in the node.
595  * this returns the address of the start of the last item,
596  * which is the stop of the leaf data stack
597  */
598 static inline unsigned int leaf_data_end(struct btrfs_root *root,
599                                          struct extent_buffer *leaf)
600 {
601         u32 nr = btrfs_header_nritems(leaf);
602         if (nr == 0)
603                 return BTRFS_LEAF_DATA_SIZE(root);
604         return btrfs_item_offset_nr(leaf, nr - 1);
605 }
606
607 /*
608  * extra debugging checks to make sure all the items in a key are
609  * well formed and in the proper order
610  */
611 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
612                       int level)
613 {
614         struct extent_buffer *parent = NULL;
615         struct extent_buffer *node = path->nodes[level];
616         struct btrfs_disk_key parent_key;
617         struct btrfs_disk_key node_key;
618         int parent_slot;
619         int slot;
620         struct btrfs_key cpukey;
621         u32 nritems = btrfs_header_nritems(node);
622
623         if (path->nodes[level + 1])
624                 parent = path->nodes[level + 1];
625
626         slot = path->slots[level];
627         BUG_ON(nritems == 0);
628         if (parent) {
629                 parent_slot = path->slots[level + 1];
630                 btrfs_node_key(parent, &parent_key, parent_slot);
631                 btrfs_node_key(node, &node_key, 0);
632                 BUG_ON(memcmp(&parent_key, &node_key,
633                               sizeof(struct btrfs_disk_key)));
634                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
635                        btrfs_header_bytenr(node));
636         }
637         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
638         if (slot != 0) {
639                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
640                 btrfs_node_key(node, &node_key, slot);
641                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
642         }
643         if (slot < nritems - 1) {
644                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
645                 btrfs_node_key(node, &node_key, slot);
646                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
647         }
648         return 0;
649 }
650
651 /*
652  * extra checking to make sure all the items in a leaf are
653  * well formed and in the proper order
654  */
655 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
656                       int level)
657 {
658         struct extent_buffer *leaf = path->nodes[level];
659         struct extent_buffer *parent = NULL;
660         int parent_slot;
661         struct btrfs_key cpukey;
662         struct btrfs_disk_key parent_key;
663         struct btrfs_disk_key leaf_key;
664         int slot = path->slots[0];
665
666         u32 nritems = btrfs_header_nritems(leaf);
667
668         if (path->nodes[level + 1])
669                 parent = path->nodes[level + 1];
670
671         if (nritems == 0)
672                 return 0;
673
674         if (parent) {
675                 parent_slot = path->slots[level + 1];
676                 btrfs_node_key(parent, &parent_key, parent_slot);
677                 btrfs_item_key(leaf, &leaf_key, 0);
678
679                 BUG_ON(memcmp(&parent_key, &leaf_key,
680                        sizeof(struct btrfs_disk_key)));
681                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
682                        btrfs_header_bytenr(leaf));
683         }
684 #if 0
685         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
686                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
687                 btrfs_item_key(leaf, &leaf_key, i);
688                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
689                         btrfs_print_leaf(root, leaf);
690                         printk("slot %d offset bad key\n", i);
691                         BUG_ON(1);
692                 }
693                 if (btrfs_item_offset_nr(leaf, i) !=
694                         btrfs_item_end_nr(leaf, i + 1)) {
695                         btrfs_print_leaf(root, leaf);
696                         printk("slot %d offset bad\n", i);
697                         BUG_ON(1);
698                 }
699                 if (i == 0) {
700                         if (btrfs_item_offset_nr(leaf, i) +
701                                btrfs_item_size_nr(leaf, i) !=
702                                BTRFS_LEAF_DATA_SIZE(root)) {
703                                 btrfs_print_leaf(root, leaf);
704                                 printk("slot %d first offset bad\n", i);
705                                 BUG_ON(1);
706                         }
707                 }
708         }
709         if (nritems > 0) {
710                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
711                                 btrfs_print_leaf(root, leaf);
712                                 printk("slot %d bad size \n", nritems - 1);
713                                 BUG_ON(1);
714                 }
715         }
716 #endif
717         if (slot != 0 && slot < nritems - 1) {
718                 btrfs_item_key(leaf, &leaf_key, slot);
719                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
720                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
721                         btrfs_print_leaf(root, leaf);
722                         printk("slot %d offset bad key\n", slot);
723                         BUG_ON(1);
724                 }
725                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
726                        btrfs_item_end_nr(leaf, slot)) {
727                         btrfs_print_leaf(root, leaf);
728                         printk("slot %d offset bad\n", slot);
729                         BUG_ON(1);
730                 }
731         }
732         if (slot < nritems - 1) {
733                 btrfs_item_key(leaf, &leaf_key, slot);
734                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
735                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
736                 if (btrfs_item_offset_nr(leaf, slot) !=
737                         btrfs_item_end_nr(leaf, slot + 1)) {
738                         btrfs_print_leaf(root, leaf);
739                         printk("slot %d offset bad\n", slot);
740                         BUG_ON(1);
741                 }
742         }
743         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
744                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
745         return 0;
746 }
747
748 static int noinline check_block(struct btrfs_root *root,
749                                 struct btrfs_path *path, int level)
750 {
751         u64 found_start;
752         return 0;
753         if (btrfs_header_level(path->nodes[level]) != level)
754             printk("warning: bad level %Lu wanted %d found %d\n",
755                    path->nodes[level]->start, level,
756                    btrfs_header_level(path->nodes[level]));
757         found_start = btrfs_header_bytenr(path->nodes[level]);
758         if (found_start != path->nodes[level]->start) {
759             printk("warning: bad bytentr %Lu found %Lu\n",
760                    path->nodes[level]->start, found_start);
761         }
762 #if 0
763         struct extent_buffer *buf = path->nodes[level];
764
765         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
766                                  (unsigned long)btrfs_header_fsid(buf),
767                                  BTRFS_FSID_SIZE)) {
768                 printk("warning bad block %Lu\n", buf->start);
769                 return 1;
770         }
771 #endif
772         if (level == 0)
773                 return check_leaf(root, path, level);
774         return check_node(root, path, level);
775 }
776
777 /*
778  * search for key in the extent_buffer.  The items start at offset p,
779  * and they are item_size apart.  There are 'max' items in p.
780  *
781  * the slot in the array is returned via slot, and it points to
782  * the place where you would insert key if it is not found in
783  * the array.
784  *
785  * slot may point to max if the key is bigger than all of the keys
786  */
787 static noinline int generic_bin_search(struct extent_buffer *eb,
788                                        unsigned long p,
789                                        int item_size, struct btrfs_key *key,
790                                        int max, int *slot)
791 {
792         int low = 0;
793         int high = max;
794         int mid;
795         int ret;
796         struct btrfs_disk_key *tmp = NULL;
797         struct btrfs_disk_key unaligned;
798         unsigned long offset;
799         char *map_token = NULL;
800         char *kaddr = NULL;
801         unsigned long map_start = 0;
802         unsigned long map_len = 0;
803         int err;
804
805         while(low < high) {
806                 mid = (low + high) / 2;
807                 offset = p + mid * item_size;
808
809                 if (!map_token || offset < map_start ||
810                     (offset + sizeof(struct btrfs_disk_key)) >
811                     map_start + map_len) {
812                         if (map_token) {
813                                 unmap_extent_buffer(eb, map_token, KM_USER0);
814                                 map_token = NULL;
815                         }
816
817                         err = map_private_extent_buffer(eb, offset,
818                                                 sizeof(struct btrfs_disk_key),
819                                                 &map_token, &kaddr,
820                                                 &map_start, &map_len, KM_USER0);
821
822                         if (!err) {
823                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
824                                                         map_start);
825                         } else {
826                                 read_extent_buffer(eb, &unaligned,
827                                                    offset, sizeof(unaligned));
828                                 tmp = &unaligned;
829                         }
830
831                 } else {
832                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
833                                                         map_start);
834                 }
835                 ret = comp_keys(tmp, key);
836
837                 if (ret < 0)
838                         low = mid + 1;
839                 else if (ret > 0)
840                         high = mid;
841                 else {
842                         *slot = mid;
843                         if (map_token)
844                                 unmap_extent_buffer(eb, map_token, KM_USER0);
845                         return 0;
846                 }
847         }
848         *slot = low;
849         if (map_token)
850                 unmap_extent_buffer(eb, map_token, KM_USER0);
851         return 1;
852 }
853
854 /*
855  * simple bin_search frontend that does the right thing for
856  * leaves vs nodes
857  */
858 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
859                       int level, int *slot)
860 {
861         if (level == 0) {
862                 return generic_bin_search(eb,
863                                           offsetof(struct btrfs_leaf, items),
864                                           sizeof(struct btrfs_item),
865                                           key, btrfs_header_nritems(eb),
866                                           slot);
867         } else {
868                 return generic_bin_search(eb,
869                                           offsetof(struct btrfs_node, ptrs),
870                                           sizeof(struct btrfs_key_ptr),
871                                           key, btrfs_header_nritems(eb),
872                                           slot);
873         }
874         return -1;
875 }
876
877 /* given a node and slot number, this reads the blocks it points to.  The
878  * extent buffer is returned with a reference taken (but unlocked).
879  * NULL is returned on error.
880  */
881 static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
882                                    struct extent_buffer *parent, int slot)
883 {
884         int level = btrfs_header_level(parent);
885         if (slot < 0)
886                 return NULL;
887         if (slot >= btrfs_header_nritems(parent))
888                 return NULL;
889
890         BUG_ON(level == 0);
891
892         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
893                        btrfs_level_size(root, level - 1),
894                        btrfs_node_ptr_generation(parent, slot));
895 }
896
897 /*
898  * node level balancing, used to make sure nodes are in proper order for
899  * item deletion.  We balance from the top down, so we have to make sure
900  * that a deletion won't leave an node completely empty later on.
901  */
902 static noinline int balance_level(struct btrfs_trans_handle *trans,
903                          struct btrfs_root *root,
904                          struct btrfs_path *path, int level)
905 {
906         struct extent_buffer *right = NULL;
907         struct extent_buffer *mid;
908         struct extent_buffer *left = NULL;
909         struct extent_buffer *parent = NULL;
910         int ret = 0;
911         int wret;
912         int pslot;
913         int orig_slot = path->slots[level];
914         int err_on_enospc = 0;
915         u64 orig_ptr;
916
917         if (level == 0)
918                 return 0;
919
920         mid = path->nodes[level];
921         WARN_ON(!path->locks[level]);
922         WARN_ON(btrfs_header_generation(mid) != trans->transid);
923
924         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
925
926         if (level < BTRFS_MAX_LEVEL - 1)
927                 parent = path->nodes[level + 1];
928         pslot = path->slots[level + 1];
929
930         /*
931          * deal with the case where there is only one pointer in the root
932          * by promoting the node below to a root
933          */
934         if (!parent) {
935                 struct extent_buffer *child;
936
937                 if (btrfs_header_nritems(mid) != 1)
938                         return 0;
939
940                 /* promote the child to a root */
941                 child = read_node_slot(root, mid, 0);
942                 btrfs_tree_lock(child);
943                 BUG_ON(!child);
944                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0);
945                 BUG_ON(ret);
946
947                 spin_lock(&root->node_lock);
948                 root->node = child;
949                 spin_unlock(&root->node_lock);
950
951                 ret = btrfs_update_extent_ref(trans, root, child->start,
952                                               mid->start, child->start,
953                                               root->root_key.objectid,
954                                               trans->transid, level - 1);
955                 BUG_ON(ret);
956
957                 add_root_to_dirty_list(root);
958                 btrfs_tree_unlock(child);
959                 path->locks[level] = 0;
960                 path->nodes[level] = NULL;
961                 clean_tree_block(trans, root, mid);
962                 btrfs_tree_unlock(mid);
963                 /* once for the path */
964                 free_extent_buffer(mid);
965                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
966                                         mid->start, root->root_key.objectid,
967                                         btrfs_header_generation(mid),
968                                         level, 1);
969                 /* once for the root ptr */
970                 free_extent_buffer(mid);
971                 return ret;
972         }
973         if (btrfs_header_nritems(mid) >
974             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
975                 return 0;
976
977         if (btrfs_header_nritems(mid) < 2)
978                 err_on_enospc = 1;
979
980         left = read_node_slot(root, parent, pslot - 1);
981         if (left) {
982                 btrfs_tree_lock(left);
983                 wret = btrfs_cow_block(trans, root, left,
984                                        parent, pslot - 1, &left, 0);
985                 if (wret) {
986                         ret = wret;
987                         goto enospc;
988                 }
989         }
990         right = read_node_slot(root, parent, pslot + 1);
991         if (right) {
992                 btrfs_tree_lock(right);
993                 wret = btrfs_cow_block(trans, root, right,
994                                        parent, pslot + 1, &right, 0);
995                 if (wret) {
996                         ret = wret;
997                         goto enospc;
998                 }
999         }
1000
1001         /* first, try to make some room in the middle buffer */
1002         if (left) {
1003                 orig_slot += btrfs_header_nritems(left);
1004                 wret = push_node_left(trans, root, left, mid, 1);
1005                 if (wret < 0)
1006                         ret = wret;
1007                 if (btrfs_header_nritems(mid) < 2)
1008                         err_on_enospc = 1;
1009         }
1010
1011         /*
1012          * then try to empty the right most buffer into the middle
1013          */
1014         if (right) {
1015                 wret = push_node_left(trans, root, mid, right, 1);
1016                 if (wret < 0 && wret != -ENOSPC)
1017                         ret = wret;
1018                 if (btrfs_header_nritems(right) == 0) {
1019                         u64 bytenr = right->start;
1020                         u64 generation = btrfs_header_generation(parent);
1021                         u32 blocksize = right->len;
1022
1023                         clean_tree_block(trans, root, right);
1024                         btrfs_tree_unlock(right);
1025                         free_extent_buffer(right);
1026                         right = NULL;
1027                         wret = del_ptr(trans, root, path, level + 1, pslot +
1028                                        1);
1029                         if (wret)
1030                                 ret = wret;
1031                         wret = btrfs_free_extent(trans, root, bytenr,
1032                                                  blocksize, parent->start,
1033                                                  btrfs_header_owner(parent),
1034                                                  generation, level, 1);
1035                         if (wret)
1036                                 ret = wret;
1037                 } else {
1038                         struct btrfs_disk_key right_key;
1039                         btrfs_node_key(right, &right_key, 0);
1040                         btrfs_set_node_key(parent, &right_key, pslot + 1);
1041                         btrfs_mark_buffer_dirty(parent);
1042                 }
1043         }
1044         if (btrfs_header_nritems(mid) == 1) {
1045                 /*
1046                  * we're not allowed to leave a node with one item in the
1047                  * tree during a delete.  A deletion from lower in the tree
1048                  * could try to delete the only pointer in this node.
1049                  * So, pull some keys from the left.
1050                  * There has to be a left pointer at this point because
1051                  * otherwise we would have pulled some pointers from the
1052                  * right
1053                  */
1054                 BUG_ON(!left);
1055                 wret = balance_node_right(trans, root, mid, left);
1056                 if (wret < 0) {
1057                         ret = wret;
1058                         goto enospc;
1059                 }
1060                 if (wret == 1) {
1061                         wret = push_node_left(trans, root, left, mid, 1);
1062                         if (wret < 0)
1063                                 ret = wret;
1064                 }
1065                 BUG_ON(wret == 1);
1066         }
1067         if (btrfs_header_nritems(mid) == 0) {
1068                 /* we've managed to empty the middle node, drop it */
1069                 u64 root_gen = btrfs_header_generation(parent);
1070                 u64 bytenr = mid->start;
1071                 u32 blocksize = mid->len;
1072
1073                 clean_tree_block(trans, root, mid);
1074                 btrfs_tree_unlock(mid);
1075                 free_extent_buffer(mid);
1076                 mid = NULL;
1077                 wret = del_ptr(trans, root, path, level + 1, pslot);
1078                 if (wret)
1079                         ret = wret;
1080                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
1081                                          parent->start,
1082                                          btrfs_header_owner(parent),
1083                                          root_gen, level, 1);
1084                 if (wret)
1085                         ret = wret;
1086         } else {
1087                 /* update the parent key to reflect our changes */
1088                 struct btrfs_disk_key mid_key;
1089                 btrfs_node_key(mid, &mid_key, 0);
1090                 btrfs_set_node_key(parent, &mid_key, pslot);
1091                 btrfs_mark_buffer_dirty(parent);
1092         }
1093
1094         /* update the path */
1095         if (left) {
1096                 if (btrfs_header_nritems(left) > orig_slot) {
1097                         extent_buffer_get(left);
1098                         /* left was locked after cow */
1099                         path->nodes[level] = left;
1100                         path->slots[level + 1] -= 1;
1101                         path->slots[level] = orig_slot;
1102                         if (mid) {
1103                                 btrfs_tree_unlock(mid);
1104                                 free_extent_buffer(mid);
1105                         }
1106                 } else {
1107                         orig_slot -= btrfs_header_nritems(left);
1108                         path->slots[level] = orig_slot;
1109                 }
1110         }
1111         /* double check we haven't messed things up */
1112         check_block(root, path, level);
1113         if (orig_ptr !=
1114             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
1115                 BUG();
1116 enospc:
1117         if (right) {
1118                 btrfs_tree_unlock(right);
1119                 free_extent_buffer(right);
1120         }
1121         if (left) {
1122                 if (path->nodes[level] != left)
1123                         btrfs_tree_unlock(left);
1124                 free_extent_buffer(left);
1125         }
1126         return ret;
1127 }
1128
1129 /* Node balancing for insertion.  Here we only split or push nodes around
1130  * when they are completely full.  This is also done top down, so we
1131  * have to be pessimistic.
1132  */
1133 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
1134                                           struct btrfs_root *root,
1135                                           struct btrfs_path *path, int level)
1136 {
1137         struct extent_buffer *right = NULL;
1138         struct extent_buffer *mid;
1139         struct extent_buffer *left = NULL;
1140         struct extent_buffer *parent = NULL;
1141         int ret = 0;
1142         int wret;
1143         int pslot;
1144         int orig_slot = path->slots[level];
1145         u64 orig_ptr;
1146
1147         if (level == 0)
1148                 return 1;
1149
1150         mid = path->nodes[level];
1151         WARN_ON(btrfs_header_generation(mid) != trans->transid);
1152         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1153
1154         if (level < BTRFS_MAX_LEVEL - 1)
1155                 parent = path->nodes[level + 1];
1156         pslot = path->slots[level + 1];
1157
1158         if (!parent)
1159                 return 1;
1160
1161         left = read_node_slot(root, parent, pslot - 1);
1162
1163         /* first, try to make some room in the middle buffer */
1164         if (left) {
1165                 u32 left_nr;
1166
1167                 btrfs_tree_lock(left);
1168                 left_nr = btrfs_header_nritems(left);
1169                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1170                         wret = 1;
1171                 } else {
1172                         ret = btrfs_cow_block(trans, root, left, parent,
1173                                               pslot - 1, &left, 0);
1174                         if (ret)
1175                                 wret = 1;
1176                         else {
1177                                 wret = push_node_left(trans, root,
1178                                                       left, mid, 0);
1179                         }
1180                 }
1181                 if (wret < 0)
1182                         ret = wret;
1183                 if (wret == 0) {
1184                         struct btrfs_disk_key disk_key;
1185                         orig_slot += left_nr;
1186                         btrfs_node_key(mid, &disk_key, 0);
1187                         btrfs_set_node_key(parent, &disk_key, pslot);
1188                         btrfs_mark_buffer_dirty(parent);
1189                         if (btrfs_header_nritems(left) > orig_slot) {
1190                                 path->nodes[level] = left;
1191                                 path->slots[level + 1] -= 1;
1192                                 path->slots[level] = orig_slot;
1193                                 btrfs_tree_unlock(mid);
1194                                 free_extent_buffer(mid);
1195                         } else {
1196                                 orig_slot -=
1197                                         btrfs_header_nritems(left);
1198                                 path->slots[level] = orig_slot;
1199                                 btrfs_tree_unlock(left);
1200                                 free_extent_buffer(left);
1201                         }
1202                         return 0;
1203                 }
1204                 btrfs_tree_unlock(left);
1205                 free_extent_buffer(left);
1206         }
1207         right = read_node_slot(root, parent, pslot + 1);
1208
1209         /*
1210          * then try to empty the right most buffer into the middle
1211          */
1212         if (right) {
1213                 u32 right_nr;
1214                 btrfs_tree_lock(right);
1215                 right_nr = btrfs_header_nritems(right);
1216                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1217                         wret = 1;
1218                 } else {
1219                         ret = btrfs_cow_block(trans, root, right,
1220                                               parent, pslot + 1,
1221                                               &right, 0);
1222                         if (ret)
1223                                 wret = 1;
1224                         else {
1225                                 wret = balance_node_right(trans, root,
1226                                                           right, mid);
1227                         }
1228                 }
1229                 if (wret < 0)
1230                         ret = wret;
1231                 if (wret == 0) {
1232                         struct btrfs_disk_key disk_key;
1233
1234                         btrfs_node_key(right, &disk_key, 0);
1235                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
1236                         btrfs_mark_buffer_dirty(parent);
1237
1238                         if (btrfs_header_nritems(mid) <= orig_slot) {
1239                                 path->nodes[level] = right;
1240                                 path->slots[level + 1] += 1;
1241                                 path->slots[level] = orig_slot -
1242                                         btrfs_header_nritems(mid);
1243                                 btrfs_tree_unlock(mid);
1244                                 free_extent_buffer(mid);
1245                         } else {
1246                                 btrfs_tree_unlock(right);
1247                                 free_extent_buffer(right);
1248                         }
1249                         return 0;
1250                 }
1251                 btrfs_tree_unlock(right);
1252                 free_extent_buffer(right);
1253         }
1254         return 1;
1255 }
1256
1257 /*
1258  * readahead one full node of leaves, finding things that are close
1259  * to the block in 'slot', and triggering ra on them.
1260  */
1261 static noinline void reada_for_search(struct btrfs_root *root,
1262                                       struct btrfs_path *path,
1263                                       int level, int slot, u64 objectid)
1264 {
1265         struct extent_buffer *node;
1266         struct btrfs_disk_key disk_key;
1267         u32 nritems;
1268         u64 search;
1269         u64 lowest_read;
1270         u64 highest_read;
1271         u64 nread = 0;
1272         int direction = path->reada;
1273         struct extent_buffer *eb;
1274         u32 nr;
1275         u32 blocksize;
1276         u32 nscan = 0;
1277
1278         if (level != 1)
1279                 return;
1280
1281         if (!path->nodes[level])
1282                 return;
1283
1284         node = path->nodes[level];
1285
1286         search = btrfs_node_blockptr(node, slot);
1287         blocksize = btrfs_level_size(root, level - 1);
1288         eb = btrfs_find_tree_block(root, search, blocksize);
1289         if (eb) {
1290                 free_extent_buffer(eb);
1291                 return;
1292         }
1293
1294         highest_read = search;
1295         lowest_read = search;
1296
1297         nritems = btrfs_header_nritems(node);
1298         nr = slot;
1299         while(1) {
1300                 if (direction < 0) {
1301                         if (nr == 0)
1302                                 break;
1303                         nr--;
1304                 } else if (direction > 0) {
1305                         nr++;
1306                         if (nr >= nritems)
1307                                 break;
1308                 }
1309                 if (path->reada < 0 && objectid) {
1310                         btrfs_node_key(node, &disk_key, nr);
1311                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1312                                 break;
1313                 }
1314                 search = btrfs_node_blockptr(node, nr);
1315                 if ((search >= lowest_read && search <= highest_read) ||
1316                     (search < lowest_read && lowest_read - search <= 16384) ||
1317                     (search > highest_read && search - highest_read <= 16384)) {
1318                         readahead_tree_block(root, search, blocksize,
1319                                      btrfs_node_ptr_generation(node, nr));
1320                         nread += blocksize;
1321                 }
1322                 nscan++;
1323                 if (path->reada < 2 && (nread > (64 * 1024) || nscan > 32))
1324                         break;
1325                 if(nread > (256 * 1024) || nscan > 128)
1326                         break;
1327
1328                 if (search < lowest_read)
1329                         lowest_read = search;
1330                 if (search > highest_read)
1331                         highest_read = search;
1332         }
1333 }
1334
1335 /*
1336  * when we walk down the tree, it is usually safe to unlock the higher layers in
1337  * the tree.  The exceptions are when our path goes through slot 0, because operations
1338  * on the tree might require changing key pointers higher up in the tree.
1339  *
1340  * callers might also have set path->keep_locks, which tells this code to
1341  * keep the lock if the path points to the last slot in the block.  This is
1342  * part of walking through the tree, and selecting the next slot in the higher
1343  * block.
1344  *
1345  * lowest_unlock sets the lowest level in the tree we're allowed to unlock.
1346  * so if lowest_unlock is 1, level 0 won't be unlocked
1347  */
1348 static noinline void unlock_up(struct btrfs_path *path, int level,
1349                                int lowest_unlock)
1350 {
1351         int i;
1352         int skip_level = level;
1353         int no_skips = 0;
1354         struct extent_buffer *t;
1355
1356         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1357                 if (!path->nodes[i])
1358                         break;
1359                 if (!path->locks[i])
1360                         break;
1361                 if (!no_skips && path->slots[i] == 0) {
1362                         skip_level = i + 1;
1363                         continue;
1364                 }
1365                 if (!no_skips && path->keep_locks) {
1366                         u32 nritems;
1367                         t = path->nodes[i];
1368                         nritems = btrfs_header_nritems(t);
1369                         if (nritems < 1 || path->slots[i] >= nritems - 1) {
1370                                 skip_level = i + 1;
1371                                 continue;
1372                         }
1373                 }
1374                 if (skip_level < i && i >= lowest_unlock)
1375                         no_skips = 1;
1376
1377                 t = path->nodes[i];
1378                 if (i >= lowest_unlock && i > skip_level && path->locks[i]) {
1379                         btrfs_tree_unlock(t);
1380                         path->locks[i] = 0;
1381                 }
1382         }
1383 }
1384
1385 /*
1386  * look for key in the tree.  path is filled in with nodes along the way
1387  * if key is found, we return zero and you can find the item in the leaf
1388  * level of the path (level 0)
1389  *
1390  * If the key isn't found, the path points to the slot where it should
1391  * be inserted, and 1 is returned.  If there are other errors during the
1392  * search a negative error number is returned.
1393  *
1394  * if ins_len > 0, nodes and leaves will be split as we walk down the
1395  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1396  * possible)
1397  */
1398 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1399                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1400                       ins_len, int cow)
1401 {
1402         struct extent_buffer *b;
1403         struct extent_buffer *tmp;
1404         int slot;
1405         int ret;
1406         int level;
1407         int should_reada = p->reada;
1408         int lowest_unlock = 1;
1409         int blocksize;
1410         u8 lowest_level = 0;
1411         u64 blocknr;
1412         u64 gen;
1413         struct btrfs_key prealloc_block;
1414
1415         lowest_level = p->lowest_level;
1416         WARN_ON(lowest_level && ins_len > 0);
1417         WARN_ON(p->nodes[0] != NULL);
1418
1419         if (ins_len < 0)
1420                 lowest_unlock = 2;
1421
1422         prealloc_block.objectid = 0;
1423
1424 again:
1425         if (p->skip_locking)
1426                 b = btrfs_root_node(root);
1427         else
1428                 b = btrfs_lock_root_node(root);
1429
1430         while (b) {
1431                 level = btrfs_header_level(b);
1432
1433                 /*
1434                  * setup the path here so we can release it under lock
1435                  * contention with the cow code
1436                  */
1437                 p->nodes[level] = b;
1438                 if (!p->skip_locking)
1439                         p->locks[level] = 1;
1440
1441                 if (cow) {
1442                         int wret;
1443
1444                         /* is a cow on this block not required */
1445                         spin_lock(&root->fs_info->hash_lock);
1446                         if (btrfs_header_generation(b) == trans->transid &&
1447                             btrfs_header_owner(b) == root->root_key.objectid &&
1448                             !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) {
1449                                 spin_unlock(&root->fs_info->hash_lock);
1450                                 goto cow_done;
1451                         }
1452                         spin_unlock(&root->fs_info->hash_lock);
1453
1454                         /* ok, we have to cow, is our old prealloc the right
1455                          * size?
1456                          */
1457                         if (prealloc_block.objectid &&
1458                             prealloc_block.offset != b->len) {
1459                                 btrfs_free_reserved_extent(root,
1460                                            prealloc_block.objectid,
1461                                            prealloc_block.offset);
1462                                 prealloc_block.objectid = 0;
1463                         }
1464
1465                         /*
1466                          * for higher level blocks, try not to allocate blocks
1467                          * with the block and the parent locks held.
1468                          */
1469                         if (level > 1 && !prealloc_block.objectid &&
1470                             btrfs_path_lock_waiting(p, level)) {
1471                                 u32 size = b->len;
1472                                 u64 hint = b->start;
1473
1474                                 btrfs_release_path(root, p);
1475                                 ret = btrfs_reserve_extent(trans, root,
1476                                                            size, size, 0,
1477                                                            hint, (u64)-1,
1478                                                            &prealloc_block, 0);
1479                                 BUG_ON(ret);
1480                                 goto again;
1481                         }
1482
1483                         wret = btrfs_cow_block(trans, root, b,
1484                                                p->nodes[level + 1],
1485                                                p->slots[level + 1],
1486                                                &b, prealloc_block.objectid);
1487                         prealloc_block.objectid = 0;
1488                         if (wret) {
1489                                 free_extent_buffer(b);
1490                                 ret = wret;
1491                                 goto done;
1492                         }
1493                 }
1494 cow_done:
1495                 BUG_ON(!cow && ins_len);
1496                 if (level != btrfs_header_level(b))
1497                         WARN_ON(1);
1498                 level = btrfs_header_level(b);
1499
1500                 p->nodes[level] = b;
1501                 if (!p->skip_locking)
1502                         p->locks[level] = 1;
1503
1504                 ret = check_block(root, p, level);
1505                 if (ret) {
1506                         ret = -1;
1507                         goto done;
1508                 }
1509
1510                 ret = bin_search(b, key, level, &slot);
1511                 if (level != 0) {
1512                         if (ret && slot > 0)
1513                                 slot -= 1;
1514                         p->slots[level] = slot;
1515                         if ((p->search_for_split || ins_len > 0) &&
1516                             btrfs_header_nritems(b) >=
1517                             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1518                                 int sret = split_node(trans, root, p, level);
1519                                 BUG_ON(sret > 0);
1520                                 if (sret) {
1521                                         ret = sret;
1522                                         goto done;
1523                                 }
1524                                 b = p->nodes[level];
1525                                 slot = p->slots[level];
1526                         } else if (ins_len < 0) {
1527                                 int sret = balance_level(trans, root, p,
1528                                                          level);
1529                                 if (sret) {
1530                                         ret = sret;
1531                                         goto done;
1532                                 }
1533                                 b = p->nodes[level];
1534                                 if (!b) {
1535                                         btrfs_release_path(NULL, p);
1536                                         goto again;
1537                                 }
1538                                 slot = p->slots[level];
1539                                 BUG_ON(btrfs_header_nritems(b) == 1);
1540                         }
1541                         unlock_up(p, level, lowest_unlock);
1542
1543                         /* this is only true while dropping a snapshot */
1544                         if (level == lowest_level) {
1545                                 ret = 0;
1546                                 goto done;
1547                         }
1548
1549                         blocknr = btrfs_node_blockptr(b, slot);
1550                         gen = btrfs_node_ptr_generation(b, slot);
1551                         blocksize = btrfs_level_size(root, level - 1);
1552
1553                         tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1554                         if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1555                                 b = tmp;
1556                         } else {
1557                                 /*
1558                                  * reduce lock contention at high levels
1559                                  * of the btree by dropping locks before
1560                                  * we read.
1561                                  */
1562                                 if (level > 1) {
1563                                         btrfs_release_path(NULL, p);
1564                                         if (tmp)
1565                                                 free_extent_buffer(tmp);
1566                                         if (should_reada)
1567                                                 reada_for_search(root, p,
1568                                                                  level, slot,
1569                                                                  key->objectid);
1570
1571                                         tmp = read_tree_block(root, blocknr,
1572                                                          blocksize, gen);
1573                                         if (tmp)
1574                                                 free_extent_buffer(tmp);
1575                                         goto again;
1576                                 } else {
1577                                         if (tmp)
1578                                                 free_extent_buffer(tmp);
1579                                         if (should_reada)
1580                                                 reada_for_search(root, p,
1581                                                                  level, slot,
1582                                                                  key->objectid);
1583                                         b = read_node_slot(root, b, slot);
1584                                 }
1585                         }
1586                         if (!p->skip_locking)
1587                                 btrfs_tree_lock(b);
1588                 } else {
1589                         p->slots[level] = slot;
1590                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1591                             sizeof(struct btrfs_item) + ins_len) {
1592                                 int sret = split_leaf(trans, root, key,
1593                                                       p, ins_len, ret == 0);
1594                                 BUG_ON(sret > 0);
1595                                 if (sret) {
1596                                         ret = sret;
1597                                         goto done;
1598                                 }
1599                         }
1600                         if (!p->search_for_split)
1601                                 unlock_up(p, level, lowest_unlock);
1602                         goto done;
1603                 }
1604         }
1605         ret = 1;
1606 done:
1607         if (prealloc_block.objectid) {
1608                 btrfs_free_reserved_extent(root,
1609                            prealloc_block.objectid,
1610                            prealloc_block.offset);
1611         }
1612
1613         return ret;
1614 }
1615
1616 int btrfs_merge_path(struct btrfs_trans_handle *trans,
1617                      struct btrfs_root *root,
1618                      struct btrfs_key *node_keys,
1619                      u64 *nodes, int lowest_level)
1620 {
1621         struct extent_buffer *eb;
1622         struct extent_buffer *parent;
1623         struct btrfs_key key;
1624         u64 bytenr;
1625         u64 generation;
1626         u32 blocksize;
1627         int level;
1628         int slot;
1629         int key_match;
1630         int ret;
1631
1632         eb = btrfs_lock_root_node(root);
1633         ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0);
1634         BUG_ON(ret);
1635
1636         parent = eb;
1637         while (1) {
1638                 level = btrfs_header_level(parent);
1639                 if (level == 0 || level <= lowest_level)
1640                         break;
1641
1642                 ret = bin_search(parent, &node_keys[lowest_level], level,
1643                                  &slot);
1644                 if (ret && slot > 0)
1645                         slot--;
1646
1647                 bytenr = btrfs_node_blockptr(parent, slot);
1648                 if (nodes[level - 1] == bytenr)
1649                         break;
1650
1651                 blocksize = btrfs_level_size(root, level - 1);
1652                 generation = btrfs_node_ptr_generation(parent, slot);
1653                 btrfs_node_key_to_cpu(eb, &key, slot);
1654                 key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
1655
1656                 if (generation == trans->transid) {
1657                         eb = read_tree_block(root, bytenr, blocksize,
1658                                              generation);
1659                         btrfs_tree_lock(eb);
1660                 }
1661
1662                 /*
1663                  * if node keys match and node pointer hasn't been modified
1664                  * in the running transaction, we can merge the path. for
1665                  * blocks owened by reloc trees, the node pointer check is
1666                  * skipped, this is because these blocks are fully controlled
1667                  * by the space balance code, no one else can modify them.
1668                  */
1669                 if (!nodes[level - 1] || !key_match ||
1670                     (generation == trans->transid &&
1671                      btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
1672                         if (level == 1 || level == lowest_level + 1) {
1673                                 if (generation == trans->transid) {
1674                                         btrfs_tree_unlock(eb);
1675                                         free_extent_buffer(eb);
1676                                 }
1677                                 break;
1678                         }
1679
1680                         if (generation != trans->transid) {
1681                                 eb = read_tree_block(root, bytenr, blocksize,
1682                                                 generation);
1683                                 btrfs_tree_lock(eb);
1684                         }
1685
1686                         ret = btrfs_cow_block(trans, root, eb, parent, slot,
1687                                               &eb, 0);
1688                         BUG_ON(ret);
1689
1690                         if (root->root_key.objectid ==
1691                             BTRFS_TREE_RELOC_OBJECTID) {
1692                                 if (!nodes[level - 1]) {
1693                                         nodes[level - 1] = eb->start;
1694                                         memcpy(&node_keys[level - 1], &key,
1695                                                sizeof(node_keys[0]));
1696                                 } else {
1697                                         WARN_ON(1);
1698                                 }
1699                         }
1700
1701                         btrfs_tree_unlock(parent);
1702                         free_extent_buffer(parent);
1703                         parent = eb;
1704                         continue;
1705                 }
1706
1707                 btrfs_set_node_blockptr(parent, slot, nodes[level - 1]);
1708                 btrfs_set_node_ptr_generation(parent, slot, trans->transid);
1709                 btrfs_mark_buffer_dirty(parent);
1710
1711                 ret = btrfs_inc_extent_ref(trans, root,
1712                                         nodes[level - 1],
1713                                         blocksize, parent->start,
1714                                         btrfs_header_owner(parent),
1715                                         btrfs_header_generation(parent),
1716                                         level - 1);
1717                 BUG_ON(ret);
1718
1719                 /*
1720                  * If the block was created in the running transaction,
1721                  * it's possible this is the last reference to it, so we
1722                  * should drop the subtree.
1723                  */
1724                 if (generation == trans->transid) {
1725                         ret = btrfs_drop_subtree(trans, root, eb, parent);
1726                         BUG_ON(ret);
1727                         btrfs_tree_unlock(eb);
1728                         free_extent_buffer(eb);
1729                 } else {
1730                         ret = btrfs_free_extent(trans, root, bytenr,
1731                                         blocksize, parent->start,
1732                                         btrfs_header_owner(parent),
1733                                         btrfs_header_generation(parent),
1734                                         level - 1, 1);
1735                         BUG_ON(ret);
1736                 }
1737                 break;
1738         }
1739         btrfs_tree_unlock(parent);
1740         free_extent_buffer(parent);
1741         return 0;
1742 }
1743
1744 /*
1745  * adjust the pointers going up the tree, starting at level
1746  * making sure the right key of each node is points to 'key'.
1747  * This is used after shifting pointers to the left, so it stops
1748  * fixing up pointers when a given leaf/node is not in slot 0 of the
1749  * higher levels
1750  *
1751  * If this fails to write a tree block, it returns -1, but continues
1752  * fixing up the blocks in ram so the tree is consistent.
1753  */
1754 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1755                           struct btrfs_root *root, struct btrfs_path *path,
1756                           struct btrfs_disk_key *key, int level)
1757 {
1758         int i;
1759         int ret = 0;
1760         struct extent_buffer *t;
1761
1762         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1763                 int tslot = path->slots[i];
1764                 if (!path->nodes[i])
1765                         break;
1766                 t = path->nodes[i];
1767                 btrfs_set_node_key(t, key, tslot);
1768                 btrfs_mark_buffer_dirty(path->nodes[i]);
1769                 if (tslot != 0)
1770                         break;
1771         }
1772         return ret;
1773 }
1774
1775 /*
1776  * update item key.
1777  *
1778  * This function isn't completely safe. It's the caller's responsibility
1779  * that the new key won't break the order
1780  */
1781 int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
1782                             struct btrfs_root *root, struct btrfs_path *path,
1783                             struct btrfs_key *new_key)
1784 {
1785         struct btrfs_disk_key disk_key;
1786         struct extent_buffer *eb;
1787         int slot;
1788
1789         eb = path->nodes[0];
1790         slot = path->slots[0];
1791         if (slot > 0) {
1792                 btrfs_item_key(eb, &disk_key, slot - 1);
1793                 if (comp_keys(&disk_key, new_key) >= 0)
1794                         return -1;
1795         }
1796         if (slot < btrfs_header_nritems(eb) - 1) {
1797                 btrfs_item_key(eb, &disk_key, slot + 1);
1798                 if (comp_keys(&disk_key, new_key) <= 0)
1799                         return -1;
1800         }
1801
1802         btrfs_cpu_key_to_disk(&disk_key, new_key);
1803         btrfs_set_item_key(eb, &disk_key, slot);
1804         btrfs_mark_buffer_dirty(eb);
1805         if (slot == 0)
1806                 fixup_low_keys(trans, root, path, &disk_key, 1);
1807         return 0;
1808 }
1809
1810 /*
1811  * try to push data from one node into the next node left in the
1812  * tree.
1813  *
1814  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1815  * error, and > 0 if there was no room in the left hand block.
1816  */
1817 static int push_node_left(struct btrfs_trans_handle *trans,
1818                           struct btrfs_root *root, struct extent_buffer *dst,
1819                           struct extent_buffer *src, int empty)
1820 {
1821         int push_items = 0;
1822         int src_nritems;
1823         int dst_nritems;
1824         int ret = 0;
1825
1826         src_nritems = btrfs_header_nritems(src);
1827         dst_nritems = btrfs_header_nritems(dst);
1828         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1829         WARN_ON(btrfs_header_generation(src) != trans->transid);
1830         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1831
1832         if (!empty && src_nritems <= 8)
1833                 return 1;
1834
1835         if (push_items <= 0) {
1836                 return 1;
1837         }
1838
1839         if (empty) {
1840                 push_items = min(src_nritems, push_items);
1841                 if (push_items < src_nritems) {
1842                         /* leave at least 8 pointers in the node if
1843                          * we aren't going to empty it
1844                          */
1845                         if (src_nritems - push_items < 8) {
1846                                 if (push_items <= 8)
1847                                         return 1;
1848                                 push_items -= 8;
1849                         }
1850                 }
1851         } else
1852                 push_items = min(src_nritems - 8, push_items);
1853
1854         copy_extent_buffer(dst, src,
1855                            btrfs_node_key_ptr_offset(dst_nritems),
1856                            btrfs_node_key_ptr_offset(0),
1857                            push_items * sizeof(struct btrfs_key_ptr));
1858
1859         if (push_items < src_nritems) {
1860                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1861                                       btrfs_node_key_ptr_offset(push_items),
1862                                       (src_nritems - push_items) *
1863                                       sizeof(struct btrfs_key_ptr));
1864         }
1865         btrfs_set_header_nritems(src, src_nritems - push_items);
1866         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1867         btrfs_mark_buffer_dirty(src);
1868         btrfs_mark_buffer_dirty(dst);
1869
1870         ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items);
1871         BUG_ON(ret);
1872
1873         return ret;
1874 }
1875
1876 /*
1877  * try to push data from one node into the next node right in the
1878  * tree.
1879  *
1880  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1881  * error, and > 0 if there was no room in the right hand block.
1882  *
1883  * this will  only push up to 1/2 the contents of the left node over
1884  */
1885 static int balance_node_right(struct btrfs_trans_handle *trans,
1886                               struct btrfs_root *root,
1887                               struct extent_buffer *dst,
1888                               struct extent_buffer *src)
1889 {
1890         int push_items = 0;
1891         int max_push;
1892         int src_nritems;
1893         int dst_nritems;
1894         int ret = 0;
1895
1896         WARN_ON(btrfs_header_generation(src) != trans->transid);
1897         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1898
1899         src_nritems = btrfs_header_nritems(src);
1900         dst_nritems = btrfs_header_nritems(dst);
1901         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1902         if (push_items <= 0) {
1903                 return 1;
1904         }
1905
1906         if (src_nritems < 4) {
1907                 return 1;
1908         }
1909
1910         max_push = src_nritems / 2 + 1;
1911         /* don't try to empty the node */
1912         if (max_push >= src_nritems) {
1913                 return 1;
1914         }
1915
1916         if (max_push < push_items)
1917                 push_items = max_push;
1918
1919         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1920                                       btrfs_node_key_ptr_offset(0),
1921                                       (dst_nritems) *
1922                                       sizeof(struct btrfs_key_ptr));
1923
1924         copy_extent_buffer(dst, src,
1925                            btrfs_node_key_ptr_offset(0),
1926                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1927                            push_items * sizeof(struct btrfs_key_ptr));
1928
1929         btrfs_set_header_nritems(src, src_nritems - push_items);
1930         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1931
1932         btrfs_mark_buffer_dirty(src);
1933         btrfs_mark_buffer_dirty(dst);
1934
1935         ret = btrfs_update_ref(trans, root, src, dst, 0, push_items);
1936         BUG_ON(ret);
1937
1938         return ret;
1939 }
1940
1941 /*
1942  * helper function to insert a new root level in the tree.
1943  * A new node is allocated, and a single item is inserted to
1944  * point to the existing root
1945  *
1946  * returns zero on success or < 0 on failure.
1947  */
1948 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1949                            struct btrfs_root *root,
1950                            struct btrfs_path *path, int level)
1951 {
1952         u64 lower_gen;
1953         struct extent_buffer *lower;
1954         struct extent_buffer *c;
1955         struct extent_buffer *old;
1956         struct btrfs_disk_key lower_key;
1957         int ret;
1958
1959         BUG_ON(path->nodes[level]);
1960         BUG_ON(path->nodes[level-1] != root->node);
1961
1962         lower = path->nodes[level-1];
1963         if (level == 1)
1964                 btrfs_item_key(lower, &lower_key, 0);
1965         else
1966                 btrfs_node_key(lower, &lower_key, 0);
1967
1968         c = btrfs_alloc_free_block(trans, root, root->nodesize, 0,
1969                                    root->root_key.objectid, trans->transid,
1970                                    level, root->node->start, 0);
1971         if (IS_ERR(c))
1972                 return PTR_ERR(c);
1973
1974         memset_extent_buffer(c, 0, 0, root->nodesize);
1975         btrfs_set_header_nritems(c, 1);
1976         btrfs_set_header_level(c, level);
1977         btrfs_set_header_bytenr(c, c->start);
1978         btrfs_set_header_generation(c, trans->transid);
1979         btrfs_set_header_owner(c, root->root_key.objectid);
1980
1981         write_extent_buffer(c, root->fs_info->fsid,
1982                             (unsigned long)btrfs_header_fsid(c),
1983                             BTRFS_FSID_SIZE);
1984
1985         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
1986                             (unsigned long)btrfs_header_chunk_tree_uuid(c),
1987                             BTRFS_UUID_SIZE);
1988
1989         btrfs_set_node_key(c, &lower_key, 0);
1990         btrfs_set_node_blockptr(c, 0, lower->start);
1991         lower_gen = btrfs_header_generation(lower);
1992         WARN_ON(lower_gen != trans->transid);
1993
1994         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1995
1996         btrfs_mark_buffer_dirty(c);
1997
1998         spin_lock(&root->node_lock);
1999         old = root->node;
2000         root->node = c;
2001         spin_unlock(&root->node_lock);
2002
2003         ret = btrfs_update_extent_ref(trans, root, lower->start,
2004                                       lower->start, c->start,
2005                                       root->root_key.objectid,
2006                                       trans->transid, level - 1);
2007         BUG_ON(ret);
2008
2009         /* the super has an extra ref to root->node */
2010         free_extent_buffer(old);
2011
2012         add_root_to_dirty_list(root);
2013         extent_buffer_get(c);
2014         path->nodes[level] = c;
2015         path->locks[level] = 1;
2016         path->slots[level] = 0;
2017         return 0;
2018 }
2019
2020 /*
2021  * worker function to insert a single pointer in a node.
2022  * the node should have enough room for the pointer already
2023  *
2024  * slot and level indicate where you want the key to go, and
2025  * blocknr is the block the key points to.
2026  *
2027  * returns zero on success and < 0 on any error
2028  */
2029 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
2030                       *root, struct btrfs_path *path, struct btrfs_disk_key
2031                       *key, u64 bytenr, int slot, int level)
2032 {
2033         struct extent_buffer *lower;
2034         int nritems;
2035
2036         BUG_ON(!path->nodes[level]);
2037         lower = path->nodes[level];
2038         nritems = btrfs_header_nritems(lower);
2039         if (slot > nritems)
2040                 BUG();
2041         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
2042                 BUG();
2043         if (slot != nritems) {
2044                 memmove_extent_buffer(lower,
2045                               btrfs_node_key_ptr_offset(slot + 1),
2046                               btrfs_node_key_ptr_offset(slot),
2047                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
2048         }
2049         btrfs_set_node_key(lower, key, slot);
2050         btrfs_set_node_blockptr(lower, slot, bytenr);
2051         WARN_ON(trans->transid == 0);
2052         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
2053         btrfs_set_header_nritems(lower, nritems + 1);
2054         btrfs_mark_buffer_dirty(lower);
2055         return 0;
2056 }
2057
2058 /*
2059  * split the node at the specified level in path in two.
2060  * The path is corrected to point to the appropriate node after the split
2061  *
2062  * Before splitting this tries to make some room in the node by pushing
2063  * left and right, if either one works, it returns right away.
2064  *
2065  * returns 0 on success and < 0 on failure
2066  */
2067 static noinline int split_node(struct btrfs_trans_handle *trans,
2068                                struct btrfs_root *root,
2069                                struct btrfs_path *path, int level)
2070 {
2071         struct extent_buffer *c;
2072         struct extent_buffer *split;
2073         struct btrfs_disk_key disk_key;
2074         int mid;
2075         int ret;
2076         int wret;
2077         u32 c_nritems;
2078
2079         c = path->nodes[level];
2080         WARN_ON(btrfs_header_generation(c) != trans->transid);
2081         if (c == root->node) {
2082                 /* trying to split the root, lets make a new one */
2083                 ret = insert_new_root(trans, root, path, level + 1);
2084                 if (ret)
2085                         return ret;
2086         } else {
2087                 ret = push_nodes_for_insert(trans, root, path, level);
2088                 c = path->nodes[level];
2089                 if (!ret && btrfs_header_nritems(c) <
2090                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
2091                         return 0;
2092                 if (ret < 0)
2093                         return ret;
2094         }
2095
2096         c_nritems = btrfs_header_nritems(c);
2097
2098         split = btrfs_alloc_free_block(trans, root, root->nodesize,
2099                                         path->nodes[level + 1]->start,
2100                                         root->root_key.objectid,
2101                                         trans->transid, level, c->start, 0);
2102         if (IS_ERR(split))
2103                 return PTR_ERR(split);
2104
2105         btrfs_set_header_flags(split, btrfs_header_flags(c));
2106         btrfs_set_header_level(split, btrfs_header_level(c));
2107         btrfs_set_header_bytenr(split, split->start);
2108         btrfs_set_header_generation(split, trans->transid);
2109         btrfs_set_header_owner(split, root->root_key.objectid);
2110         btrfs_set_header_flags(split, 0);
2111         write_extent_buffer(split, root->fs_info->fsid,
2112                             (unsigned long)btrfs_header_fsid(split),
2113                             BTRFS_FSID_SIZE);
2114         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
2115                             (unsigned long)btrfs_header_chunk_tree_uuid(split),
2116                             BTRFS_UUID_SIZE);
2117
2118         mid = (c_nritems + 1) / 2;
2119
2120         copy_extent_buffer(split, c,
2121                            btrfs_node_key_ptr_offset(0),
2122                            btrfs_node_key_ptr_offset(mid),
2123                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
2124         btrfs_set_header_nritems(split, c_nritems - mid);
2125         btrfs_set_header_nritems(c, mid);
2126         ret = 0;
2127
2128         btrfs_mark_buffer_dirty(c);
2129         btrfs_mark_buffer_dirty(split);
2130
2131         btrfs_node_key(split, &disk_key, 0);
2132         wret = insert_ptr(trans, root, path, &disk_key, split->start,
2133                           path->slots[level + 1] + 1,
2134                           level + 1);
2135         if (wret)
2136                 ret = wret;
2137
2138         ret = btrfs_update_ref(trans, root, c, split, 0, c_nritems - mid);
2139         BUG_ON(ret);
2140
2141         if (path->slots[level] >= mid) {
2142                 path->slots[level] -= mid;
2143                 btrfs_tree_unlock(c);
2144                 free_extent_buffer(c);
2145                 path->nodes[level] = split;
2146                 path->slots[level + 1] += 1;
2147         } else {
2148                 btrfs_tree_unlock(split);
2149                 free_extent_buffer(split);
2150         }
2151         return ret;
2152 }
2153
2154 /*
2155  * how many bytes are required to store the items in a leaf.  start
2156  * and nr indicate which items in the leaf to check.  This totals up the
2157  * space used both by the item structs and the item data
2158  */
2159 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
2160 {
2161         int data_len;
2162         int nritems = btrfs_header_nritems(l);
2163         int end = min(nritems, start + nr) - 1;
2164
2165         if (!nr)
2166                 return 0;
2167         data_len = btrfs_item_end_nr(l, start);
2168         data_len = data_len - btrfs_item_offset_nr(l, end);
2169         data_len += sizeof(struct btrfs_item) * nr;
2170         WARN_ON(data_len < 0);
2171         return data_len;
2172 }
2173
2174 /*
2175  * The space between the end of the leaf items and
2176  * the start of the leaf data.  IOW, how much room
2177  * the leaf has left for both items and data
2178  */
2179 int noinline btrfs_leaf_free_space(struct btrfs_root *root,
2180                                    struct extent_buffer *leaf)
2181 {
2182         int nritems = btrfs_header_nritems(leaf);
2183         int ret;
2184         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
2185         if (ret < 0) {
2186                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
2187                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
2188                        leaf_space_used(leaf, 0, nritems), nritems);
2189         }
2190         return ret;
2191 }
2192
2193 /*
2194  * push some data in the path leaf to the right, trying to free up at
2195  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2196  *
2197  * returns 1 if the push failed because the other node didn't have enough
2198  * room, 0 if everything worked out and < 0 if there were major errors.
2199  */
2200 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
2201                            *root, struct btrfs_path *path, int data_size,
2202                            int empty)
2203 {
2204         struct extent_buffer *left = path->nodes[0];
2205         struct extent_buffer *right;
2206         struct extent_buffer *upper;
2207         struct btrfs_disk_key disk_key;
2208         int slot;
2209         u32 i;
2210         int free_space;
2211         int push_space = 0;
2212         int push_items = 0;
2213         struct btrfs_item *item;
2214         u32 left_nritems;
2215         u32 nr;
2216         u32 right_nritems;
2217         u32 data_end;
2218         u32 this_item_size;
2219         int ret;
2220
2221         slot = path->slots[1];
2222         if (!path->nodes[1]) {
2223                 return 1;
2224         }
2225         upper = path->nodes[1];
2226         if (slot >= btrfs_header_nritems(upper) - 1)
2227                 return 1;
2228
2229         WARN_ON(!btrfs_tree_locked(path->nodes[1]));
2230
2231         right = read_node_slot(root, upper, slot + 1);
2232         btrfs_tree_lock(right);
2233         free_space = btrfs_leaf_free_space(root, right);
2234         if (free_space < data_size + sizeof(struct btrfs_item))
2235                 goto out_unlock;
2236
2237         /* cow and double check */
2238         ret = btrfs_cow_block(trans, root, right, upper,
2239                               slot + 1, &right, 0);
2240         if (ret)
2241                 goto out_unlock;
2242
2243         free_space = btrfs_leaf_free_space(root, right);
2244         if (free_space < data_size + sizeof(struct btrfs_item))
2245                 goto out_unlock;
2246
2247         left_nritems = btrfs_header_nritems(left);
2248         if (left_nritems == 0)
2249                 goto out_unlock;
2250
2251         if (empty)
2252                 nr = 0;
2253         else
2254                 nr = 1;
2255
2256         if (path->slots[0] >= left_nritems)
2257                 push_space += data_size + sizeof(*item);
2258
2259         i = left_nritems - 1;
2260         while (i >= nr) {
2261                 item = btrfs_item_nr(left, i);
2262
2263                 if (!empty && push_items > 0) {
2264                         if (path->slots[0] > i)
2265                                 break;
2266                         if (path->slots[0] == i) {
2267                                 int space = btrfs_leaf_free_space(root, left);
2268                                 if (space + push_space * 2 > free_space)
2269                                         break;
2270                         }
2271                 }
2272
2273                 if (path->slots[0] == i)
2274                         push_space += data_size + sizeof(*item);
2275
2276                 if (!left->map_token) {
2277                         map_extent_buffer(left, (unsigned long)item,
2278                                         sizeof(struct btrfs_item),
2279                                         &left->map_token, &left->kaddr,
2280                                         &left->map_start, &left->map_len,
2281                                         KM_USER1);
2282                 }
2283
2284                 this_item_size = btrfs_item_size(left, item);
2285                 if (this_item_size + sizeof(*item) + push_space > free_space)
2286                         break;
2287
2288                 push_items++;
2289                 push_space += this_item_size + sizeof(*item);
2290                 if (i == 0)
2291                         break;
2292                 i--;
2293         }
2294         if (left->map_token) {
2295                 unmap_extent_buffer(left, left->map_token, KM_USER1);
2296                 left->map_token = NULL;
2297         }
2298
2299         if (push_items == 0)
2300                 goto out_unlock;
2301
2302         if (!empty && push_items == left_nritems)
2303                 WARN_ON(1);
2304
2305         /* push left to right */
2306         right_nritems = btrfs_header_nritems(right);
2307
2308         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
2309         push_space -= leaf_data_end(root, left);
2310
2311         /* make room in the right data area */
2312         data_end = leaf_data_end(root, right);
2313         memmove_extent_buffer(right,
2314                               btrfs_leaf_data(right) + data_end - push_space,
2315                               btrfs_leaf_data(right) + data_end,
2316                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
2317
2318         /* copy from the left data area */
2319         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
2320                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
2321                      btrfs_leaf_data(left) + leaf_data_end(root, left),
2322                      push_space);
2323
2324         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
2325                               btrfs_item_nr_offset(0),
2326                               right_nritems * sizeof(struct btrfs_item));
2327
2328         /* copy the items from left to right */
2329         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
2330                    btrfs_item_nr_offset(left_nritems - push_items),
2331                    push_items * sizeof(struct btrfs_item));
2332
2333         /* update the item pointers */
2334         right_nritems += push_items;
2335         btrfs_set_header_nritems(right, right_nritems);
2336         push_space = BTRFS_LEAF_DATA_SIZE(root);
2337         for (i = 0; i < right_nritems; i++) {
2338                 item = btrfs_item_nr(right, i);
2339                 if (!right->map_token) {
2340                         map_extent_buffer(right, (unsigned long)item,
2341                                         sizeof(struct btrfs_item),
2342                                         &right->map_token, &right->kaddr,
2343                                         &right->map_start, &right->map_len,
2344                                         KM_USER1);
2345                 }
2346                 push_space -= btrfs_item_size(right, item);
2347                 btrfs_set_item_offset(right, item, push_space);
2348         }
2349
2350         if (right->map_token) {
2351                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2352                 right->map_token = NULL;
2353         }
2354         left_nritems -= push_items;
2355         btrfs_set_header_nritems(left, left_nritems);
2356
2357         if (left_nritems)
2358                 btrfs_mark_buffer_dirty(left);
2359         btrfs_mark_buffer_dirty(right);
2360
2361         ret = btrfs_update_ref(trans, root, left, right, 0, push_items);
2362         BUG_ON(ret);
2363
2364         btrfs_item_key(right, &disk_key, 0);
2365         btrfs_set_node_key(upper, &disk_key, slot + 1);
2366         btrfs_mark_buffer_dirty(upper);
2367
2368         /* then fixup the leaf pointer in the path */
2369         if (path->slots[0] >= left_nritems) {
2370                 path->slots[0] -= left_nritems;
2371                 if (btrfs_header_nritems(path->nodes[0]) == 0)
2372                         clean_tree_block(trans, root, path->nodes[0]);
2373                 btrfs_tree_unlock(path->nodes[0]);
2374                 free_extent_buffer(path->nodes[0]);
2375                 path->nodes[0] = right;
2376                 path->slots[1] += 1;
2377         } else {
2378                 btrfs_tree_unlock(right);
2379                 free_extent_buffer(right);
2380         }
2381         return 0;
2382
2383 out_unlock:
2384         btrfs_tree_unlock(right);
2385         free_extent_buffer(right);
2386         return 1;
2387 }
2388
2389 /*
2390  * push some data in the path leaf to the left, trying to free up at
2391  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
2392  */
2393 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
2394                           *root, struct btrfs_path *path, int data_size,
2395                           int empty)
2396 {
2397         struct btrfs_disk_key disk_key;
2398         struct extent_buffer *right = path->nodes[0];
2399         struct extent_buffer *left;
2400         int slot;
2401         int i;
2402         int free_space;
2403         int push_space = 0;
2404         int push_items = 0;
2405         struct btrfs_item *item;
2406         u32 old_left_nritems;
2407         u32 right_nritems;
2408         u32 nr;
2409         int ret = 0;
2410         int wret;
2411         u32 this_item_size;
2412         u32 old_left_item_size;
2413
2414         slot = path->slots[1];
2415         if (slot == 0)
2416                 return 1;
2417         if (!path->nodes[1])
2418                 return 1;
2419
2420         right_nritems = btrfs_header_nritems(right);
2421         if (right_nritems == 0) {
2422                 return 1;
2423         }
2424
2425         WARN_ON(!btrfs_tree_locked(path->nodes[1]));
2426
2427         left = read_node_slot(root, path->nodes[1], slot - 1);
2428         btrfs_tree_lock(left);
2429         free_space = btrfs_leaf_free_space(root, left);
2430         if (free_space < data_size + sizeof(struct btrfs_item)) {
2431                 ret = 1;
2432                 goto out;
2433         }
2434
2435         /* cow and double check */
2436         ret = btrfs_cow_block(trans, root, left,
2437                               path->nodes[1], slot - 1, &left, 0);
2438         if (ret) {
2439                 /* we hit -ENOSPC, but it isn't fatal here */
2440                 ret = 1;
2441                 goto out;
2442         }
2443
2444         free_space = btrfs_leaf_free_space(root, left);
2445         if (free_space < data_size + sizeof(struct btrfs_item)) {
2446                 ret = 1;
2447                 goto out;
2448         }
2449
2450         if (empty)
2451                 nr = right_nritems;
2452         else
2453                 nr = right_nritems - 1;
2454
2455         for (i = 0; i < nr; i++) {
2456                 item = btrfs_item_nr(right, i);
2457                 if (!right->map_token) {
2458                         map_extent_buffer(right, (unsigned long)item,
2459                                         sizeof(struct btrfs_item),
2460                                         &right->map_token, &right->kaddr,
2461                                         &right->map_start, &right->map_len,
2462                                         KM_USER1);
2463                 }
2464
2465                 if (!empty && push_items > 0) {
2466                         if (path->slots[0] < i)
2467                                 break;
2468                         if (path->slots[0] == i) {
2469                                 int space = btrfs_leaf_free_space(root, right);
2470                                 if (space + push_space * 2 > free_space)
2471                                         break;
2472                         }
2473                 }
2474
2475                 if (path->slots[0] == i)
2476                         push_space += data_size + sizeof(*item);
2477
2478                 this_item_size = btrfs_item_size(right, item);
2479                 if (this_item_size + sizeof(*item) + push_space > free_space)
2480                         break;
2481
2482                 push_items++;
2483                 push_space += this_item_size + sizeof(*item);
2484         }
2485
2486         if (right->map_token) {
2487                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2488                 right->map_token = NULL;
2489         }
2490
2491         if (push_items == 0) {
2492                 ret = 1;
2493                 goto out;
2494         }
2495         if (!empty && push_items == btrfs_header_nritems(right))
2496                 WARN_ON(1);
2497
2498         /* push data from right to left */
2499         copy_extent_buffer(left, right,
2500                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
2501                            btrfs_item_nr_offset(0),
2502                            push_items * sizeof(struct btrfs_item));
2503
2504         push_space = BTRFS_LEAF_DATA_SIZE(root) -
2505                      btrfs_item_offset_nr(right, push_items -1);
2506
2507         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
2508                      leaf_data_end(root, left) - push_space,
2509                      btrfs_leaf_data(right) +
2510                      btrfs_item_offset_nr(right, push_items - 1),
2511                      push_space);
2512         old_left_nritems = btrfs_header_nritems(left);
2513         BUG_ON(old_left_nritems < 0);
2514
2515         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
2516         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
2517                 u32 ioff;
2518
2519                 item = btrfs_item_nr(left, i);
2520                 if (!left->map_token) {
2521                         map_extent_buffer(left, (unsigned long)item,
2522                                         sizeof(struct btrfs_item),
2523                                         &left->map_token, &left->kaddr,
2524                                         &left->map_start, &left->map_len,
2525                                         KM_USER1);
2526                 }
2527
2528                 ioff = btrfs_item_offset(left, item);
2529                 btrfs_set_item_offset(left, item,
2530                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
2531         }
2532         btrfs_set_header_nritems(left, old_left_nritems + push_items);
2533         if (left->map_token) {
2534                 unmap_extent_buffer(left, left->map_token, KM_USER1);
2535                 left->map_token = NULL;
2536         }
2537
2538         /* fixup right node */
2539         if (push_items > right_nritems) {
2540                 printk("push items %d nr %u\n", push_items, right_nritems);
2541                 WARN_ON(1);
2542         }
2543
2544         if (push_items < right_nritems) {
2545                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
2546                                                   leaf_data_end(root, right);
2547                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
2548                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
2549                                       btrfs_leaf_data(right) +
2550                                       leaf_data_end(root, right), push_space);
2551
2552                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
2553                               btrfs_item_nr_offset(push_items),
2554                              (btrfs_header_nritems(right) - push_items) *
2555                              sizeof(struct btrfs_item));
2556         }
2557         right_nritems -= push_items;
2558         btrfs_set_header_nritems(right, right_nritems);
2559         push_space = BTRFS_LEAF_DATA_SIZE(root);
2560         for (i = 0; i < right_nritems; i++) {
2561                 item = btrfs_item_nr(right, i);
2562
2563                 if (!right->map_token) {
2564                         map_extent_buffer(right, (unsigned long)item,
2565                                         sizeof(struct btrfs_item),
2566                                         &right->map_token, &right->kaddr,
2567                                         &right->map_start, &right->map_len,
2568                                         KM_USER1);
2569                 }
2570
2571                 push_space = push_space - btrfs_item_size(right, item);
2572                 btrfs_set_item_offset(right, item, push_space);
2573         }
2574         if (right->map_token) {
2575                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2576                 right->map_token = NULL;
2577         }
2578
2579         btrfs_mark_buffer_dirty(left);
2580         if (right_nritems)
2581                 btrfs_mark_buffer_dirty(right);
2582
2583         ret = btrfs_update_ref(trans, root, right, left,
2584                                old_left_nritems, push_items);
2585         BUG_ON(ret);
2586
2587         btrfs_item_key(right, &disk_key, 0);
2588         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
2589         if (wret)
2590                 ret = wret;
2591
2592         /* then fixup the leaf pointer in the path */
2593         if (path->slots[0] < push_items) {
2594                 path->slots[0] += old_left_nritems;
2595                 if (btrfs_header_nritems(path->nodes[0]) == 0)
2596                         clean_tree_block(trans, root, path->nodes[0]);
2597                 btrfs_tree_unlock(path->nodes[0]);
2598                 free_extent_buffer(path->nodes[0]);
2599                 path->nodes[0] = left;
2600                 path->slots[1] -= 1;
2601         } else {
2602                 btrfs_tree_unlock(left);
2603                 free_extent_buffer(left);
2604                 path->slots[0] -= push_items;
2605         }
2606         BUG_ON(path->slots[0] < 0);
2607         return ret;
2608 out:
2609         btrfs_tree_unlock(left);
2610         free_extent_buffer(left);
2611         return ret;
2612 }
2613
2614 /*
2615  * split the path's leaf in two, making sure there is at least data_size
2616  * available for the resulting leaf level of the path.
2617  *
2618  * returns 0 if all went well and < 0 on failure.
2619  */
2620 static noinline int split_leaf(struct btrfs_trans_handle *trans,
2621                                struct btrfs_root *root,
2622                                struct btrfs_key *ins_key,
2623                                struct btrfs_path *path, int data_size,
2624                                int extend)
2625 {
2626         struct extent_buffer *l;
2627         u32 nritems;
2628         int mid;
2629         int slot;
2630         struct extent_buffer *right;
2631         int space_needed = data_size + sizeof(struct btrfs_item);
2632         int data_copy_size;
2633         int rt_data_off;
2634         int i;
2635         int ret = 0;
2636         int wret;
2637         int double_split;
2638         int num_doubles = 0;
2639         struct btrfs_disk_key disk_key;
2640
2641         if (extend && data_size)
2642                 space_needed = data_size;
2643
2644         /* first try to make some room by pushing left and right */
2645         if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) {
2646                 wret = push_leaf_right(trans, root, path, data_size, 0);
2647                 if (wret < 0) {
2648                         return wret;
2649                 }
2650                 if (wret) {
2651                         wret = push_leaf_left(trans, root, path, data_size, 0);
2652                         if (wret < 0)
2653                                 return wret;
2654                 }
2655                 l = path->nodes[0];
2656
2657                 /* did the pushes work? */
2658                 if (btrfs_leaf_free_space(root, l) >= space_needed)
2659                         return 0;
2660         }
2661
2662         if (!path->nodes[1]) {
2663                 ret = insert_new_root(trans, root, path, 1);
2664                 if (ret)
2665                         return ret;
2666         }
2667 again:
2668         double_split = 0;
2669         l = path->nodes[0];
2670         slot = path->slots[0];
2671         nritems = btrfs_header_nritems(l);
2672         mid = (nritems + 1)/ 2;
2673
2674         right = btrfs_alloc_free_block(trans, root, root->leafsize,
2675                                         path->nodes[1]->start,
2676                                         root->root_key.objectid,
2677                                         trans->transid, 0, l->start, 0);
2678         if (IS_ERR(right)) {
2679                 BUG_ON(1);
2680                 return PTR_ERR(right);
2681         }
2682
2683         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2684         btrfs_set_header_bytenr(right, right->start);
2685         btrfs_set_header_generation(right, trans->transid);
2686         btrfs_set_header_owner(right, root->root_key.objectid);
2687         btrfs_set_header_level(right, 0);
2688         write_extent_buffer(right, root->fs_info->fsid,
2689                             (unsigned long)btrfs_header_fsid(right),
2690                             BTRFS_FSID_SIZE);
2691
2692         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2693                             (unsigned long)btrfs_header_chunk_tree_uuid(right),
2694                             BTRFS_UUID_SIZE);
2695         if (mid <= slot) {
2696                 if (nritems == 1 ||
2697                     leaf_space_used(l, mid, nritems - mid) + space_needed >
2698                         BTRFS_LEAF_DATA_SIZE(root)) {
2699                         if (slot >= nritems) {
2700                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2701                                 btrfs_set_header_nritems(right, 0);
2702                                 wret = insert_ptr(trans, root, path,
2703                                                   &disk_key, right->start,
2704                                                   path->slots[1] + 1, 1);
2705                                 if (wret)
2706                                         ret = wret;
2707
2708                                 btrfs_tree_unlock(path->nodes[0]);
2709                                 free_extent_buffer(path->nodes[0]);
2710                                 path->nodes[0] = right;
2711                                 path->slots[0] = 0;
2712                                 path->slots[1] += 1;
2713                                 btrfs_mark_buffer_dirty(right);
2714                                 return ret;
2715                         }
2716                         mid = slot;
2717                         if (mid != nritems &&
2718                             leaf_space_used(l, mid, nritems - mid) +
2719                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2720                                 double_split = 1;
2721                         }
2722                 }
2723         } else {
2724                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
2725                         BTRFS_LEAF_DATA_SIZE(root)) {
2726                         if (!extend && data_size && slot == 0) {
2727                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2728                                 btrfs_set_header_nritems(right, 0);
2729                                 wret = insert_ptr(trans, root, path,
2730                                                   &disk_key,
2731                                                   right->start,
2732                                                   path->slots[1], 1);
2733                                 if (wret)
2734                                         ret = wret;
2735                                 btrfs_tree_unlock(path->nodes[0]);
2736                                 free_extent_buffer(path->nodes[0]);
2737                                 path->nodes[0] = right;
2738                                 path->slots[0] = 0;
2739                                 if (path->slots[1] == 0) {
2740                                         wret = fixup_low_keys(trans, root,
2741                                                    path, &disk_key, 1);
2742                                         if (wret)
2743                                                 ret = wret;
2744                                 }
2745                                 btrfs_mark_buffer_dirty(right);
2746                                 return ret;
2747                         } else if ((extend || !data_size) && slot == 0) {
2748                                 mid = 1;
2749                         } else {
2750                                 mid = slot;
2751                                 if (mid != nritems &&
2752                                     leaf_space_used(l, mid, nritems - mid) +
2753                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2754                                         double_split = 1;
2755                                 }
2756                         }
2757                 }
2758         }
2759         nritems = nritems - mid;
2760         btrfs_set_header_nritems(right, nritems);
2761         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2762
2763         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2764                            btrfs_item_nr_offset(mid),
2765                            nritems * sizeof(struct btrfs_item));
2766
2767         copy_extent_buffer(right, l,
2768                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2769                      data_copy_size, btrfs_leaf_data(l) +
2770                      leaf_data_end(root, l), data_copy_size);
2771
2772         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2773                       btrfs_item_end_nr(l, mid);
2774
2775         for (i = 0; i < nritems; i++) {
2776                 struct btrfs_item *item = btrfs_item_nr(right, i);
2777                 u32 ioff;
2778
2779                 if (!right->map_token) {
2780                         map_extent_buffer(right, (unsigned long)item,
2781                                         sizeof(struct btrfs_item),
2782                                         &right->map_token, &right->kaddr,
2783                                         &right->map_start, &right->map_len,
2784                                         KM_USER1);
2785                 }
2786
2787                 ioff = btrfs_item_offset(right, item);
2788                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2789         }
2790
2791         if (right->map_token) {
2792                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2793                 right->map_token = NULL;
2794         }
2795
2796         btrfs_set_header_nritems(l, mid);
2797         ret = 0;
2798         btrfs_item_key(right, &disk_key, 0);
2799         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2800                           path->slots[1] + 1, 1);
2801         if (wret)
2802                 ret = wret;
2803
2804         btrfs_mark_buffer_dirty(right);
2805         btrfs_mark_buffer_dirty(l);
2806         BUG_ON(path->slots[0] != slot);
2807
2808         ret = btrfs_update_ref(trans, root, l, right, 0, nritems);
2809         BUG_ON(ret);
2810
2811         if (mid <= slot) {
2812                 btrfs_tree_unlock(path->nodes[0]);
2813                 free_extent_buffer(path->nodes[0]);
2814                 path->nodes[0] = right;
2815                 path->slots[0] -= mid;
2816                 path->slots[1] += 1;
2817         } else {
2818                 btrfs_tree_unlock(right);
2819                 free_extent_buffer(right);
2820         }
2821
2822         BUG_ON(path->slots[0] < 0);
2823
2824         if (double_split) {
2825                 BUG_ON(num_doubles != 0);
2826                 num_doubles++;
2827                 goto again;
2828         }
2829         return ret;
2830 }
2831
2832 /*
2833  * This function splits a single item into two items,
2834  * giving 'new_key' to the new item and splitting the
2835  * old one at split_offset (from the start of the item).
2836  *
2837  * The path may be released by this operation.  After
2838  * the split, the path is pointing to the old item.  The
2839  * new item is going to be in the same node as the old one.
2840  *
2841  * Note, the item being split must be smaller enough to live alone on
2842  * a tree block with room for one extra struct btrfs_item
2843  *
2844  * This allows us to split the item in place, keeping a lock on the
2845  * leaf the entire time.
2846  */
2847 int btrfs_split_item(struct btrfs_trans_handle *trans,
2848                      struct btrfs_root *root,
2849                      struct btrfs_path *path,
2850                      struct btrfs_key *new_key,
2851                      unsigned long split_offset)
2852 {
2853         u32 item_size;
2854         struct extent_buffer *leaf;
2855         struct btrfs_key orig_key;
2856         struct btrfs_item *item;
2857         struct btrfs_item *new_item;
2858         int ret = 0;
2859         int slot;
2860         u32 nritems;
2861         u32 orig_offset;
2862         struct btrfs_disk_key disk_key;
2863         char *buf;
2864
2865         leaf = path->nodes[0];
2866         btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
2867         if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
2868                 goto split;
2869
2870         item_size = btrfs_item_size_nr(leaf, path->slots[0]);
2871         btrfs_release_path(root, path);
2872
2873         path->search_for_split = 1;
2874         path->keep_locks = 1;
2875
2876         ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
2877         path->search_for_split = 0;
2878
2879         /* if our item isn't there or got smaller, return now */
2880         if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
2881                                                         path->slots[0])) {
2882                 path->keep_locks = 0;
2883                 return -EAGAIN;
2884         }
2885
2886         ret = split_leaf(trans, root, &orig_key, path, 0, 0);
2887         path->keep_locks = 0;
2888         BUG_ON(ret);
2889
2890         leaf = path->nodes[0];
2891         BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
2892
2893 split:
2894         item = btrfs_item_nr(leaf, path->slots[0]);
2895         orig_offset = btrfs_item_offset(leaf, item);
2896         item_size = btrfs_item_size(leaf, item);
2897
2898
2899         buf = kmalloc(item_size, GFP_NOFS);
2900         read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
2901                             path->slots[0]), item_size);
2902         slot = path->slots[0] + 1;
2903         leaf = path->nodes[0];
2904
2905         nritems = btrfs_header_nritems(leaf);
2906
2907         if (slot != nritems) {
2908                 /* shift the items */
2909                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2910                               btrfs_item_nr_offset(slot),
2911                               (nritems - slot) * sizeof(struct btrfs_item));
2912
2913         }
2914
2915         btrfs_cpu_key_to_disk(&disk_key, new_key);
2916         btrfs_set_item_key(leaf, &disk_key, slot);
2917
2918         new_item = btrfs_item_nr(leaf, slot);
2919
2920         btrfs_set_item_offset(leaf, new_item, orig_offset);
2921         btrfs_set_item_size(leaf, new_item, item_size - split_offset);
2922
2923         btrfs_set_item_offset(leaf, item,
2924                               orig_offset + item_size - split_offset);
2925         btrfs_set_item_size(leaf, item, split_offset);
2926
2927         btrfs_set_header_nritems(leaf, nritems + 1);
2928
2929         /* write the data for the start of the original item */
2930         write_extent_buffer(leaf, buf,
2931                             btrfs_item_ptr_offset(leaf, path->slots[0]),
2932                             split_offset);
2933
2934         /* write the data for the new item */
2935         write_extent_buffer(leaf, buf + split_offset,
2936                             btrfs_item_ptr_offset(leaf, slot),
2937                             item_size - split_offset);
2938         btrfs_mark_buffer_dirty(leaf);
2939
2940         ret = 0;
2941         if (btrfs_leaf_free_space(root, leaf) < 0) {
2942                 btrfs_print_leaf(root, leaf);
2943                 BUG();
2944         }
2945         kfree(buf);
2946         return ret;
2947 }
2948
2949 /*
2950  * make the item pointed to by the path smaller.  new_size indicates
2951  * how small to make it, and from_end tells us if we just chop bytes
2952  * off the end of the item or if we shift the item to chop bytes off
2953  * the front.
2954  */
2955 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2956                         struct btrfs_root *root,
2957                         struct btrfs_path *path,
2958                         u32 new_size, int from_end)
2959 {
2960         int ret = 0;
2961         int slot;
2962         int slot_orig;
2963         struct extent_buffer *leaf;
2964         struct btrfs_item *item;
2965         u32 nritems;
2966         unsigned int data_end;
2967         unsigned int old_data_start;
2968         unsigned int old_size;
2969         unsigned int size_diff;
2970         int i;
2971
2972         slot_orig = path->slots[0];
2973         leaf = path->nodes[0];
2974         slot = path->slots[0];
2975
2976         old_size = btrfs_item_size_nr(leaf, slot);
2977         if (old_size == new_size)
2978                 return 0;
2979
2980         nritems = btrfs_header_nritems(leaf);
2981         data_end = leaf_data_end(root, leaf);
2982
2983         old_data_start = btrfs_item_offset_nr(leaf, slot);
2984
2985         size_diff = old_size - new_size;
2986
2987         BUG_ON(slot < 0);
2988         BUG_ON(slot >= nritems);
2989
2990         /*
2991          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2992          */
2993         /* first correct the data pointers */
2994         for (i = slot; i < nritems; i++) {
2995                 u32 ioff;
2996                 item = btrfs_item_nr(leaf, i);
2997
2998                 if (!leaf->map_token) {
2999                         map_extent_buffer(leaf, (unsigned long)item,
3000                                         sizeof(struct btrfs_item),
3001                                         &leaf->map_token, &leaf->kaddr,
3002                                         &leaf->map_start, &leaf->map_len,
3003                                         KM_USER1);
3004                 }
3005
3006                 ioff = btrfs_item_offset(leaf, item);
3007                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
3008         }
3009
3010         if (leaf->map_token) {
3011                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3012                 leaf->map_token = NULL;
3013         }
3014
3015         /* shift the data */
3016         if (from_end) {
3017                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3018                               data_end + size_diff, btrfs_leaf_data(leaf) +
3019                               data_end, old_data_start + new_size - data_end);
3020         } else {
3021                 struct btrfs_disk_key disk_key;
3022                 u64 offset;
3023
3024                 btrfs_item_key(leaf, &disk_key, slot);
3025
3026                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
3027                         unsigned long ptr;
3028                         struct btrfs_file_extent_item *fi;
3029
3030                         fi = btrfs_item_ptr(leaf, slot,
3031                                             struct btrfs_file_extent_item);
3032                         fi = (struct btrfs_file_extent_item *)(
3033                              (unsigned long)fi - size_diff);
3034
3035                         if (btrfs_file_extent_type(leaf, fi) ==
3036                             BTRFS_FILE_EXTENT_INLINE) {
3037                                 ptr = btrfs_item_ptr_offset(leaf, slot);
3038                                 memmove_extent_buffer(leaf, ptr,
3039                                         (unsigned long)fi,
3040                                         offsetof(struct btrfs_file_extent_item,
3041                                                  disk_bytenr));
3042                         }
3043                 }
3044
3045                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3046                               data_end + size_diff, btrfs_leaf_data(leaf) +
3047                               data_end, old_data_start - data_end);
3048
3049                 offset = btrfs_disk_key_offset(&disk_key);
3050                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
3051                 btrfs_set_item_key(leaf, &disk_key, slot);
3052                 if (slot == 0)
3053                         fixup_low_keys(trans, root, path, &disk_key, 1);
3054         }
3055
3056         item = btrfs_item_nr(leaf, slot);
3057         btrfs_set_item_size(leaf, item, new_size);
3058         btrfs_mark_buffer_dirty(leaf);
3059
3060         ret = 0;
3061         if (btrfs_leaf_free_space(root, leaf) < 0) {
3062                 btrfs_print_leaf(root, leaf);
3063                 BUG();
3064         }
3065         return ret;
3066 }
3067
3068 /*
3069  * make the item pointed to by the path bigger, data_size is the new size.
3070  */
3071 int btrfs_extend_item(struct btrfs_trans_handle *trans,
3072                       struct btrfs_root *root, struct btrfs_path *path,
3073                       u32 data_size)
3074 {
3075         int ret = 0;
3076         int slot;
3077         int slot_orig;
3078         struct extent_buffer *leaf;
3079         struct btrfs_item *item;
3080         u32 nritems;
3081         unsigned int data_end;
3082         unsigned int old_data;
3083         unsigned int old_size;
3084         int i;
3085
3086         slot_orig = path->slots[0];
3087         leaf = path->nodes[0];
3088
3089         nritems = btrfs_header_nritems(leaf);
3090         data_end = leaf_data_end(root, leaf);
3091
3092         if (btrfs_leaf_free_space(root, leaf) < data_size) {
3093                 btrfs_print_leaf(root, leaf);
3094                 BUG();
3095         }
3096         slot = path->slots[0];
3097         old_data = btrfs_item_end_nr(leaf, slot);
3098
3099         BUG_ON(slot < 0);
3100         if (slot >= nritems) {
3101                 btrfs_print_leaf(root, leaf);
3102                 printk("slot %d too large, nritems %d\n", slot, nritems);
3103                 BUG_ON(1);
3104         }
3105
3106         /*
3107          * item0..itemN ... dataN.offset..dataN.size .. data0.size
3108          */
3109         /* first correct the data pointers */
3110         for (i = slot; i < nritems; i++) {
3111                 u32 ioff;
3112                 item = btrfs_item_nr(leaf, i);
3113
3114                 if (!leaf->map_token) {
3115                         map_extent_buffer(leaf, (unsigned long)item,
3116                                         sizeof(struct btrfs_item),
3117                                         &leaf->map_token, &leaf->kaddr,
3118                                         &leaf->map_start, &leaf->map_len,
3119                                         KM_USER1);
3120                 }
3121                 ioff = btrfs_item_offset(leaf, item);
3122                 btrfs_set_item_offset(leaf, item, ioff - data_size);
3123         }
3124
3125         if (leaf->map_token) {
3126                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3127                 leaf->map_token = NULL;
3128         }
3129
3130         /* shift the data */
3131         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3132                       data_end - data_size, btrfs_leaf_data(leaf) +
3133                       data_end, old_data - data_end);
3134
3135         data_end = old_data;
3136         old_size = btrfs_item_size_nr(leaf, slot);
3137         item = btrfs_item_nr(leaf, slot);
3138         btrfs_set_item_size(leaf, item, old_size + data_size);
3139         btrfs_mark_buffer_dirty(leaf);
3140
3141         ret = 0;
3142         if (btrfs_leaf_free_space(root, leaf) < 0) {
3143                 btrfs_print_leaf(root, leaf);
3144                 BUG();
3145         }
3146         return ret;
3147 }
3148
3149 /*
3150  * Given a key and some data, insert items into the tree.
3151  * This does all the path init required, making room in the tree if needed.
3152  * Returns the number of keys that were inserted.
3153  */
3154 int btrfs_insert_some_items(struct btrfs_trans_handle *trans,
3155                             struct btrfs_root *root,
3156                             struct btrfs_path *path,
3157                             struct btrfs_key *cpu_key, u32 *data_size,
3158                             int nr)
3159 {
3160         struct extent_buffer *leaf;
3161         struct btrfs_item *item;
3162         int ret = 0;
3163         int slot;
3164         int i;
3165         u32 nritems;
3166         u32 total_data = 0;
3167         u32 total_size = 0;
3168         unsigned int data_end;
3169         struct btrfs_disk_key disk_key;
3170         struct btrfs_key found_key;
3171
3172         found_key.objectid = 0;
3173         nr = min_t(int, nr, BTRFS_NODEPTRS_PER_BLOCK(root));
3174
3175         for (i = 0; i < nr; i++)
3176                 total_data += data_size[i];
3177
3178         total_data = min_t(u32, total_data, BTRFS_LEAF_DATA_SIZE(root));
3179         total_size = total_data + (nr * sizeof(struct btrfs_item));
3180         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3181         if (ret == 0)
3182                 return -EEXIST;
3183         if (ret < 0)
3184                 goto out;
3185
3186         leaf = path->nodes[0];
3187
3188         nritems = btrfs_header_nritems(leaf);
3189         data_end = leaf_data_end(root, leaf);
3190
3191         if (btrfs_leaf_free_space(root, leaf) < total_size) {
3192                 for (i = nr; i >= 0; i--) {
3193                         total_data -= data_size[i];
3194                         total_size -= data_size[i] + sizeof(struct btrfs_item);
3195                         if (total_size < btrfs_leaf_free_space(root, leaf))
3196                                 break;
3197                 }
3198                 nr = i;
3199         }
3200
3201         slot = path->slots[0];
3202         BUG_ON(slot < 0);
3203
3204         if (slot != nritems) {
3205                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3206
3207                 item = btrfs_item_nr(leaf, slot);
3208                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
3209
3210                 /* figure out how many keys we can insert in here */
3211                 total_data = data_size[0];
3212                 for (i = 1; i < nr; i++) {
3213                         if (comp_cpu_keys(&found_key, cpu_key + i) <= 0)
3214                                 break;
3215                         total_data += data_size[i];
3216                 }
3217                 nr = i;
3218
3219                 if (old_data < data_end) {
3220                         btrfs_print_leaf(root, leaf);
3221                         printk("slot %d old_data %d data_end %d\n",
3222                                slot, old_data, data_end);
3223                         BUG_ON(1);
3224                 }
3225                 /*
3226                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
3227                  */
3228                 /* first correct the data pointers */
3229                 WARN_ON(leaf->map_token);
3230                 for (i = slot; i < nritems; i++) {
3231                         u32 ioff;
3232
3233                         item = btrfs_item_nr(leaf, i);
3234                         if (!leaf->map_token) {
3235                                 map_extent_buffer(leaf, (unsigned long)item,
3236                                         sizeof(struct btrfs_item),
3237                                         &leaf->map_token, &leaf->kaddr,
3238                                         &leaf->map_start, &leaf->map_len,
3239                                         KM_USER1);
3240                         }
3241
3242                         ioff = btrfs_item_offset(leaf, item);
3243                         btrfs_set_item_offset(leaf, item, ioff - total_data);
3244                 }
3245                 if (leaf->map_token) {
3246                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3247                         leaf->map_token = NULL;
3248                 }
3249
3250                 /* shift the items */
3251                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3252                               btrfs_item_nr_offset(slot),
3253                               (nritems - slot) * sizeof(struct btrfs_item));
3254
3255                 /* shift the data */
3256                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3257                               data_end - total_data, btrfs_leaf_data(leaf) +
3258                               data_end, old_data - data_end);
3259                 data_end = old_data;
3260         } else {
3261                 /*
3262                  * this sucks but it has to be done, if we are inserting at
3263                  * the end of the leaf only insert 1 of the items, since we
3264                  * have no way of knowing whats on the next leaf and we'd have
3265                  * to drop our current locks to figure it out
3266                  */
3267                 nr = 1;
3268         }
3269
3270         /* setup the item for the new data */
3271         for (i = 0; i < nr; i++) {
3272                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3273                 btrfs_set_item_key(leaf, &disk_key, slot + i);
3274                 item = btrfs_item_nr(leaf, slot + i);
3275                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3276                 data_end -= data_size[i];
3277                 btrfs_set_item_size(leaf, item, data_size[i]);
3278         }
3279         btrfs_set_header_nritems(leaf, nritems + nr);
3280         btrfs_mark_buffer_dirty(leaf);
3281
3282         ret = 0;
3283         if (slot == 0) {
3284                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3285                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3286         }
3287
3288         if (btrfs_leaf_free_space(root, leaf) < 0) {
3289                 btrfs_print_leaf(root, leaf);
3290                 BUG();
3291         }
3292 out:
3293         if (!ret)
3294                 ret = nr;
3295         return ret;
3296 }
3297
3298 /*
3299  * Given a key and some data, insert items into the tree.
3300  * This does all the path init required, making room in the tree if needed.
3301  */
3302 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3303                             struct btrfs_root *root,
3304                             struct btrfs_path *path,
3305                             struct btrfs_key *cpu_key, u32 *data_size,
3306                             int nr)
3307 {
3308         struct extent_buffer *leaf;
3309         struct btrfs_item *item;
3310         int ret = 0;
3311         int slot;
3312         int slot_orig;
3313         int i;
3314         u32 nritems;
3315         u32 total_size = 0;
3316         u32 total_data = 0;
3317         unsigned int data_end;
3318         struct btrfs_disk_key disk_key;
3319
3320         for (i = 0; i < nr; i++) {
3321                 total_data += data_size[i];
3322         }
3323
3324         total_size = total_data + (nr * sizeof(struct btrfs_item));
3325         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
3326         if (ret == 0)
3327                 return -EEXIST;
3328         if (ret < 0)
3329                 goto out;
3330
3331         slot_orig = path->slots[0];
3332         leaf = path->nodes[0];
3333
3334         nritems = btrfs_header_nritems(leaf);
3335         data_end = leaf_data_end(root, leaf);
3336
3337         if (btrfs_leaf_free_space(root, leaf) < total_size) {
3338                 btrfs_print_leaf(root, leaf);
3339                 printk("not enough freespace need %u have %d\n",
3340                        total_size, btrfs_leaf_free_space(root, leaf));
3341                 BUG();
3342         }
3343
3344         slot = path->slots[0];
3345         BUG_ON(slot < 0);
3346
3347         if (slot != nritems) {
3348                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
3349
3350                 if (old_data < data_end) {
3351                         btrfs_print_leaf(root, leaf);
3352                         printk("slot %d old_data %d data_end %d\n",
3353                                slot, old_data, data_end);
3354                         BUG_ON(1);
3355                 }
3356                 /*
3357                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
3358                  */
3359                 /* first correct the data pointers */
3360                 WARN_ON(leaf->map_token);
3361                 for (i = slot; i < nritems; i++) {
3362                         u32 ioff;
3363
3364                         item = btrfs_item_nr(leaf, i);
3365                         if (!leaf->map_token) {
3366                                 map_extent_buffer(leaf, (unsigned long)item,
3367                                         sizeof(struct btrfs_item),
3368                                         &leaf->map_token, &leaf->kaddr,
3369                                         &leaf->map_start, &leaf->map_len,
3370                                         KM_USER1);
3371                         }
3372
3373                         ioff = btrfs_item_offset(leaf, item);
3374                         btrfs_set_item_offset(leaf, item, ioff - total_data);
3375                 }
3376                 if (leaf->map_token) {
3377                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3378                         leaf->map_token = NULL;
3379                 }
3380
3381                 /* shift the items */
3382                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
3383                               btrfs_item_nr_offset(slot),
3384                               (nritems - slot) * sizeof(struct btrfs_item));
3385
3386                 /* shift the data */
3387                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3388                               data_end - total_data, btrfs_leaf_data(leaf) +
3389                               data_end, old_data - data_end);
3390                 data_end = old_data;
3391         }
3392
3393         /* setup the item for the new data */
3394         for (i = 0; i < nr; i++) {
3395                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
3396                 btrfs_set_item_key(leaf, &disk_key, slot + i);
3397                 item = btrfs_item_nr(leaf, slot + i);
3398                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
3399                 data_end -= data_size[i];
3400                 btrfs_set_item_size(leaf, item, data_size[i]);
3401         }
3402         btrfs_set_header_nritems(leaf, nritems + nr);
3403         btrfs_mark_buffer_dirty(leaf);
3404
3405         ret = 0;
3406         if (slot == 0) {
3407                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
3408                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
3409         }
3410
3411         if (btrfs_leaf_free_space(root, leaf) < 0) {
3412                 btrfs_print_leaf(root, leaf);
3413                 BUG();
3414         }
3415 out:
3416         return ret;
3417 }
3418
3419 /*
3420  * Given a key and some data, insert an item into the tree.
3421  * This does all the path init required, making room in the tree if needed.
3422  */
3423 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
3424                       *root, struct btrfs_key *cpu_key, void *data, u32
3425                       data_size)
3426 {
3427         int ret = 0;
3428         struct btrfs_path *path;
3429         struct extent_buffer *leaf;
3430         unsigned long ptr;
3431
3432         path = btrfs_alloc_path();
3433         BUG_ON(!path);
3434         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
3435         if (!ret) {
3436                 leaf = path->nodes[0];
3437                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
3438                 write_extent_buffer(leaf, data, ptr, data_size);
3439                 btrfs_mark_buffer_dirty(leaf);
3440         }
3441         btrfs_free_path(path);
3442         return ret;
3443 }
3444
3445 /*
3446  * delete the pointer from a given node.
3447  *
3448  * the tree should have been previously balanced so the deletion does not
3449  * empty a node.
3450  */
3451 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3452                    struct btrfs_path *path, int level, int slot)
3453 {
3454         struct extent_buffer *parent = path->nodes[level];
3455         u32 nritems;
3456         int ret = 0;
3457         int wret;
3458
3459         nritems = btrfs_header_nritems(parent);
3460         if (slot != nritems -1) {
3461                 memmove_extent_buffer(parent,
3462                               btrfs_node_key_ptr_offset(slot),
3463                               btrfs_node_key_ptr_offset(slot + 1),
3464                               sizeof(struct btrfs_key_ptr) *
3465                               (nritems - slot - 1));
3466         }
3467         nritems--;
3468         btrfs_set_header_nritems(parent, nritems);
3469         if (nritems == 0 && parent == root->node) {
3470                 BUG_ON(btrfs_header_level(root->node) != 1);
3471                 /* just turn the root into a leaf and break */
3472                 btrfs_set_header_level(root->node, 0);
3473         } else if (slot == 0) {
3474                 struct btrfs_disk_key disk_key;
3475
3476                 btrfs_node_key(parent, &disk_key, 0);
3477                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
3478                 if (wret)
3479                         ret = wret;
3480         }
3481         btrfs_mark_buffer_dirty(parent);
3482         return ret;
3483 }
3484
3485 /*
3486  * a helper function to delete the leaf pointed to by path->slots[1] and
3487  * path->nodes[1].  bytenr is the node block pointer, but since the callers
3488  * already know it, it is faster to have them pass it down than to
3489  * read it out of the node again.
3490  *
3491  * This deletes the pointer in path->nodes[1] and frees the leaf
3492  * block extent.  zero is returned if it all worked out, < 0 otherwise.
3493  *
3494  * The path must have already been setup for deleting the leaf, including
3495  * all the proper balancing.  path->nodes[1] must be locked.
3496  */
3497 noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
3498                             struct btrfs_root *root,
3499                             struct btrfs_path *path, u64 bytenr)
3500 {
3501         int ret;
3502         u64 root_gen = btrfs_header_generation(path->nodes[1]);
3503
3504         ret = del_ptr(trans, root, path, 1, path->slots[1]);
3505         if (ret)
3506                 return ret;
3507
3508         ret = btrfs_free_extent(trans, root, bytenr,
3509                                 btrfs_level_size(root, 0),
3510                                 path->nodes[1]->start,
3511                                 btrfs_header_owner(path->nodes[1]),
3512                                 root_gen, 0, 1);
3513         return ret;
3514 }
3515 /*
3516  * delete the item at the leaf level in path.  If that empties
3517  * the leaf, remove it from the tree
3518  */
3519 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
3520                     struct btrfs_path *path, int slot, int nr)
3521 {
3522         struct extent_buffer *leaf;
3523         struct btrfs_item *item;
3524         int last_off;
3525         int dsize = 0;
3526         int ret = 0;
3527         int wret;
3528         int i;
3529         u32 nritems;
3530
3531         leaf = path->nodes[0];
3532         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
3533
3534         for (i = 0; i < nr; i++)
3535                 dsize += btrfs_item_size_nr(leaf, slot + i);
3536
3537         nritems = btrfs_header_nritems(leaf);
3538
3539         if (slot + nr != nritems) {
3540                 int data_end = leaf_data_end(root, leaf);
3541
3542                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
3543                               data_end + dsize,
3544                               btrfs_leaf_data(leaf) + data_end,
3545                               last_off - data_end);
3546
3547                 for (i = slot + nr; i < nritems; i++) {
3548                         u32 ioff;
3549
3550                         item = btrfs_item_nr(leaf, i);
3551                         if (!leaf->map_token) {
3552                                 map_extent_buffer(leaf, (unsigned long)item,
3553                                         sizeof(struct btrfs_item),
3554                                         &leaf->map_token, &leaf->kaddr,
3555                                         &leaf->map_start, &leaf->map_len,
3556                                         KM_USER1);
3557                         }
3558                         ioff = btrfs_item_offset(leaf, item);
3559                         btrfs_set_item_offset(leaf, item, ioff + dsize);
3560                 }
3561
3562                 if (leaf->map_token) {
3563                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
3564                         leaf->map_token = NULL;
3565                 }
3566
3567                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
3568                               btrfs_item_nr_offset(slot + nr),
3569                               sizeof(struct btrfs_item) *
3570                               (nritems - slot - nr));
3571         }
3572         btrfs_set_header_nritems(leaf, nritems - nr);
3573         nritems -= nr;
3574
3575         /* delete the leaf if we've emptied it */
3576         if (nritems == 0) {
3577                 if (leaf == root->node) {
3578                         btrfs_set_header_level(leaf, 0);
3579                 } else {
3580                         ret = btrfs_del_leaf(trans, root, path, leaf->start);
3581                         BUG_ON(ret);
3582                 }
3583         } else {
3584                 int used = leaf_space_used(leaf, 0, nritems);
3585                 if (slot == 0) {
3586                         struct btrfs_disk_key disk_key;
3587
3588                         btrfs_item_key(leaf, &disk_key, 0);
3589                         wret = fixup_low_keys(trans, root, path,
3590                                               &disk_key, 1);
3591                         if (wret)
3592                                 ret = wret;
3593                 }
3594
3595                 /* delete the leaf if it is mostly empty */
3596                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
3597                         /* push_leaf_left fixes the path.
3598                          * make sure the path still points to our leaf
3599                          * for possible call to del_ptr below
3600                          */
3601                         slot = path->slots[1];
3602                         extent_buffer_get(leaf);
3603
3604                         wret = push_leaf_left(trans, root, path, 1, 1);
3605                         if (wret < 0 && wret != -ENOSPC)
3606                                 ret = wret;
3607
3608                         if (path->nodes[0] == leaf &&
3609                             btrfs_header_nritems(leaf)) {
3610                                 wret = push_leaf_right(trans, root, path, 1, 1);
3611                                 if (wret < 0 && wret != -ENOSPC)
3612                                         ret = wret;
3613                         }
3614
3615                         if (btrfs_header_nritems(leaf) == 0) {
3616                                 path->slots[1] = slot;
3617                                 ret = btrfs_del_leaf(trans, root, path, leaf->start);
3618                                 BUG_ON(ret);
3619                                 free_extent_buffer(leaf);
3620                         } else {
3621                                 /* if we're still in the path, make sure
3622                                  * we're dirty.  Otherwise, one of the
3623                                  * push_leaf functions must have already
3624                                  * dirtied this buffer
3625                                  */
3626                                 if (path->nodes[0] == leaf)
3627                                         btrfs_mark_buffer_dirty(leaf);
3628                                 free_extent_buffer(leaf);
3629                         }
3630                 } else {
3631                         btrfs_mark_buffer_dirty(leaf);
3632                 }
3633         }
3634         return ret;
3635 }
3636
3637 /*
3638  * search the tree again to find a leaf with lesser keys
3639  * returns 0 if it found something or 1 if there are no lesser leaves.
3640  * returns < 0 on io errors.
3641  *
3642  * This may release the path, and so you may lose any locks held at the
3643  * time you call it.
3644  */
3645 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
3646 {
3647         struct btrfs_key key;
3648         struct btrfs_disk_key found_key;
3649         int ret;
3650
3651         btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
3652
3653         if (key.offset > 0)
3654                 key.offset--;
3655         else if (key.type > 0)
3656                 key.type--;
3657         else if (key.objectid > 0)
3658                 key.objectid--;
3659         else
3660                 return 1;
3661
3662         btrfs_release_path(root, path);
3663         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3664         if (ret < 0)
3665                 return ret;
3666         btrfs_item_key(path->nodes[0], &found_key, 0);
3667         ret = comp_keys(&found_key, &key);
3668         if (ret < 0)
3669                 return 0;
3670         return 1;
3671 }
3672
3673 /*
3674  * A helper function to walk down the tree starting at min_key, and looking
3675  * for nodes or leaves that are either in cache or have a minimum
3676  * transaction id.  This is used by the btree defrag code, and tree logging
3677  *
3678  * This does not cow, but it does stuff the starting key it finds back
3679  * into min_key, so you can call btrfs_search_slot with cow=1 on the
3680  * key and get a writable path.
3681  *
3682  * This does lock as it descends, and path->keep_locks should be set
3683  * to 1 by the caller.
3684  *
3685  * This honors path->lowest_level to prevent descent past a given level
3686  * of the tree.
3687  *
3688  * min_trans indicates the oldest transaction that you are interested
3689  * in walking through.  Any nodes or leaves older than min_trans are
3690  * skipped over (without reading them).
3691  *
3692  * returns zero if something useful was found, < 0 on error and 1 if there
3693  * was nothing in the tree that matched the search criteria.
3694  */
3695 int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3696                          struct btrfs_key *max_key,
3697                          struct btrfs_path *path, int cache_only,
3698                          u64 min_trans)
3699 {
3700         struct extent_buffer *cur;
3701         struct btrfs_key found_key;
3702         int slot;
3703         int sret;
3704         u32 nritems;
3705         int level;
3706         int ret = 1;
3707
3708         WARN_ON(!path->keep_locks);
3709 again:
3710         cur = btrfs_lock_root_node(root);
3711         level = btrfs_header_level(cur);
3712         WARN_ON(path->nodes[level]);
3713         path->nodes[level] = cur;
3714         path->locks[level] = 1;
3715
3716         if (btrfs_header_generation(cur) < min_trans) {
3717                 ret = 1;
3718                 goto out;
3719         }
3720         while(1) {
3721                 nritems = btrfs_header_nritems(cur);
3722                 level = btrfs_header_level(cur);
3723                 sret = bin_search(cur, min_key, level, &slot);
3724
3725                 /* at the lowest level, we're done, setup the path and exit */
3726                 if (level == path->lowest_level) {
3727                         if (slot >= nritems)
3728                                 goto find_next_key;
3729                         ret = 0;
3730                         path->slots[level] = slot;
3731                         btrfs_item_key_to_cpu(cur, &found_key, slot);
3732                         goto out;
3733                 }
3734                 if (sret && slot > 0)
3735                         slot--;
3736                 /*
3737                  * check this node pointer against the cache_only and
3738                  * min_trans parameters.  If it isn't in cache or is too
3739                  * old, skip to the next one.
3740                  */
3741                 while(slot < nritems) {
3742                         u64 blockptr;
3743                         u64 gen;
3744                         struct extent_buffer *tmp;
3745                         struct btrfs_disk_key disk_key;
3746
3747                         blockptr = btrfs_node_blockptr(cur, slot);
3748                         gen = btrfs_node_ptr_generation(cur, slot);
3749                         if (gen < min_trans) {
3750                                 slot++;
3751                                 continue;
3752                         }
3753                         if (!cache_only)
3754                                 break;
3755
3756                         if (max_key) {
3757                                 btrfs_node_key(cur, &disk_key, slot);
3758                                 if (comp_keys(&disk_key, max_key) >= 0) {
3759                                         ret = 1;
3760                                         goto out;
3761                                 }
3762                         }
3763
3764                         tmp = btrfs_find_tree_block(root, blockptr,
3765                                             btrfs_level_size(root, level - 1));
3766
3767                         if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
3768                                 free_extent_buffer(tmp);
3769                                 break;
3770                         }
3771                         if (tmp)
3772                                 free_extent_buffer(tmp);
3773                         slot++;
3774                 }
3775 find_next_key:
3776                 /*
3777                  * we didn't find a candidate key in this node, walk forward
3778                  * and find another one
3779                  */
3780                 if (slot >= nritems) {
3781                         path->slots[level] = slot;
3782                         sret = btrfs_find_next_key(root, path, min_key, level,
3783                                                   cache_only, min_trans);
3784                         if (sret == 0) {
3785                                 btrfs_release_path(root, path);
3786                                 goto again;
3787                         } else {
3788                                 goto out;
3789                         }
3790                 }
3791                 /* save our key for returning back */
3792                 btrfs_node_key_to_cpu(cur, &found_key, slot);
3793                 path->slots[level] = slot;
3794                 if (level == path->lowest_level) {
3795                         ret = 0;
3796                         unlock_up(path, level, 1);
3797                         goto out;
3798                 }
3799                 cur = read_node_slot(root, cur, slot);
3800
3801                 btrfs_tree_lock(cur);
3802                 path->locks[level - 1] = 1;
3803                 path->nodes[level - 1] = cur;
3804                 unlock_up(path, level, 1);
3805         }
3806 out:
3807         if (ret == 0)
3808                 memcpy(min_key, &found_key, sizeof(found_key));
3809         return ret;
3810 }
3811
3812 /*
3813  * this is similar to btrfs_next_leaf, but does not try to preserve
3814  * and fixup the path.  It looks for and returns the next key in the
3815  * tree based on the current path and the cache_only and min_trans
3816  * parameters.
3817  *
3818  * 0 is returned if another key is found, < 0 if there are any errors
3819  * and 1 is returned if there are no higher keys in the tree
3820  *
3821  * path->keep_locks should be set to 1 on the search made before
3822  * calling this function.
3823  */
3824 int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
3825                         struct btrfs_key *key, int lowest_level,
3826                         int cache_only, u64 min_trans)
3827 {
3828         int level = lowest_level;
3829         int slot;
3830         struct extent_buffer *c;
3831
3832         WARN_ON(!path->keep_locks);
3833         while(level < BTRFS_MAX_LEVEL) {
3834                 if (!path->nodes[level])
3835                         return 1;
3836
3837                 slot = path->slots[level] + 1;
3838                 c = path->nodes[level];
3839 next:
3840                 if (slot >= btrfs_header_nritems(c)) {
3841                         level++;
3842                         if (level == BTRFS_MAX_LEVEL) {
3843                                 return 1;
3844                         }
3845                         continue;
3846                 }
3847                 if (level == 0)
3848                         btrfs_item_key_to_cpu(c, key, slot);
3849                 else {
3850                         u64 blockptr = btrfs_node_blockptr(c, slot);
3851                         u64 gen = btrfs_node_ptr_generation(c, slot);
3852
3853                         if (cache_only) {
3854                                 struct extent_buffer *cur;
3855                                 cur = btrfs_find_tree_block(root, blockptr,
3856                                             btrfs_level_size(root, level - 1));
3857                                 if (!cur || !btrfs_buffer_uptodate(cur, gen)) {
3858                                         slot++;
3859                                         if (cur)
3860                                                 free_extent_buffer(cur);
3861                                         goto next;
3862                                 }
3863                                 free_extent_buffer(cur);
3864                         }
3865                         if (gen < min_trans) {
3866                                 slot++;
3867                                 goto next;
3868                         }
3869                         btrfs_node_key_to_cpu(c, key, slot);
3870                 }
3871                 return 0;
3872         }
3873         return 1;
3874 }
3875
3876 /*
3877  * search the tree again to find a leaf with greater keys
3878  * returns 0 if it found something or 1 if there are no greater leaves.
3879  * returns < 0 on io errors.
3880  */
3881 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
3882 {
3883         int slot;
3884         int level = 1;
3885         struct extent_buffer *c;
3886         struct extent_buffer *next = NULL;
3887         struct btrfs_key key;
3888         u32 nritems;
3889         int ret;
3890
3891         nritems = btrfs_header_nritems(path->nodes[0]);
3892         if (nritems == 0) {
3893                 return 1;
3894         }
3895
3896         btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
3897
3898         btrfs_release_path(root, path);
3899         path->keep_locks = 1;
3900         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3901         path->keep_locks = 0;
3902
3903         if (ret < 0)
3904                 return ret;
3905
3906         nritems = btrfs_header_nritems(path->nodes[0]);
3907         /*
3908          * by releasing the path above we dropped all our locks.  A balance
3909          * could have added more items next to the key that used to be
3910          * at the very end of the block.  So, check again here and
3911          * advance the path if there are now more items available.
3912          */
3913         if (nritems > 0 && path->slots[0] < nritems - 1) {
3914                 path->slots[0]++;
3915                 goto done;
3916         }
3917
3918         while(level < BTRFS_MAX_LEVEL) {
3919                 if (!path->nodes[level])
3920                         return 1;
3921
3922                 slot = path->slots[level] + 1;
3923                 c = path->nodes[level];
3924                 if (slot >= btrfs_header_nritems(c)) {
3925                         level++;
3926                         if (level == BTRFS_MAX_LEVEL) {
3927                                 return 1;
3928                         }
3929                         continue;
3930                 }
3931
3932                 if (next) {
3933                         btrfs_tree_unlock(next);
3934                         free_extent_buffer(next);
3935                 }
3936
3937                 if (level == 1 && (path->locks[1] || path->skip_locking) &&
3938                     path->reada)
3939                         reada_for_search(root, path, level, slot, 0);
3940
3941                 next = read_node_slot(root, c, slot);
3942                 if (!path->skip_locking) {
3943                         WARN_ON(!btrfs_tree_locked(c));
3944                         btrfs_tree_lock(next);
3945                 }
3946                 break;
3947         }
3948         path->slots[level] = slot;
3949         while(1) {
3950                 level--;
3951                 c = path->nodes[level];
3952                 if (path->locks[level])
3953                         btrfs_tree_unlock(c);
3954                 free_extent_buffer(c);
3955                 path->nodes[level] = next;
3956                 path->slots[level] = 0;
3957                 if (!path->skip_locking)
3958                         path->locks[level] = 1;
3959                 if (!level)
3960                         break;
3961                 if (level == 1 && path->locks[1] && path->reada)
3962                         reada_for_search(root, path, level, slot, 0);
3963                 next = read_node_slot(root, next, 0);
3964                 if (!path->skip_locking) {
3965                         WARN_ON(!btrfs_tree_locked(path->nodes[level]));
3966                         btrfs_tree_lock(next);
3967                 }
3968         }
3969 done:
3970         unlock_up(path, 0, 1);
3971         return 0;
3972 }
3973
3974 /*
3975  * this uses btrfs_prev_leaf to walk backwards in the tree, and keeps
3976  * searching until it gets past min_objectid or finds an item of 'type'
3977  *
3978  * returns 0 if something is found, 1 if nothing was found and < 0 on error
3979  */
3980 int btrfs_previous_item(struct btrfs_root *root,
3981                         struct btrfs_path *path, u64 min_objectid,
3982                         int type)
3983 {
3984         struct btrfs_key found_key;
3985         struct extent_buffer *leaf;
3986         u32 nritems;
3987         int ret;
3988
3989         while(1) {
3990                 if (path->slots[0] == 0) {
3991                         ret = btrfs_prev_leaf(root, path);
3992                         if (ret != 0)
3993                                 return ret;
3994                 } else {
3995                         path->slots[0]--;
3996                 }
3997                 leaf = path->nodes[0];
3998                 nritems = btrfs_header_nritems(leaf);
3999                 if (nritems == 0)
4000                         return 1;
4001                 if (path->slots[0] == nritems)
4002                         path->slots[0]--;
4003
4004                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
4005                 if (found_key.type == type)
4006                         return 0;
4007                 if (found_key.objectid < min_objectid)
4008                         break;
4009                 if (found_key.objectid == min_objectid &&
4010                     found_key.type < type)
4011                         break;
4012         }
4013         return 1;
4014 }