Btrfs: Add readahead to the online shrinker, and a mount -o alloc_start= for testing
[linux-2.6] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_key *ins_key,
29                       struct btrfs_path *path, int data_size, int extend);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31                           struct btrfs_root *root, struct extent_buffer *dst,
32                           struct extent_buffer *src);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34                               struct btrfs_root *root,
35                               struct extent_buffer *dst_buf,
36                               struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47         struct btrfs_path *path;
48         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49         if (path) {
50                 btrfs_init_path(path);
51                 path->reada = 1;
52         }
53         return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58         btrfs_release_path(NULL, p);
59         kmem_cache_free(btrfs_path_cachep, p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64         int i;
65         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66                 if (!p->nodes[i])
67                         break;
68                 free_extent_buffer(p->nodes[i]);
69         }
70         memset(p, 0, sizeof(*p));
71 }
72
73 int btrfs_copy_root(struct btrfs_trans_handle *trans,
74                       struct btrfs_root *root,
75                       struct extent_buffer *buf,
76                       struct extent_buffer **cow_ret, u64 new_root_objectid)
77 {
78         struct extent_buffer *cow;
79         u32 nritems;
80         int ret = 0;
81         int level;
82         struct btrfs_key first_key;
83         struct btrfs_root *new_root;
84
85         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
86         if (!new_root)
87                 return -ENOMEM;
88
89         memcpy(new_root, root, sizeof(*new_root));
90         new_root->root_key.objectid = new_root_objectid;
91
92         WARN_ON(root->ref_cows && trans->transid !=
93                 root->fs_info->running_transaction->transid);
94         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
95
96         level = btrfs_header_level(buf);
97         nritems = btrfs_header_nritems(buf);
98         if (nritems) {
99                 if (level == 0)
100                         btrfs_item_key_to_cpu(buf, &first_key, 0);
101                 else
102                         btrfs_node_key_to_cpu(buf, &first_key, 0);
103         } else {
104                 first_key.objectid = 0;
105         }
106         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
107                                        new_root_objectid,
108                                        trans->transid, first_key.objectid,
109                                        level, buf->start, 0);
110         if (IS_ERR(cow)) {
111                 kfree(new_root);
112                 return PTR_ERR(cow);
113         }
114
115         copy_extent_buffer(cow, buf, 0, 0, cow->len);
116         btrfs_set_header_bytenr(cow, cow->start);
117         btrfs_set_header_generation(cow, trans->transid);
118         btrfs_set_header_owner(cow, new_root_objectid);
119
120         WARN_ON(btrfs_header_generation(buf) > trans->transid);
121         ret = btrfs_inc_ref(trans, new_root, buf);
122         kfree(new_root);
123
124         if (ret)
125                 return ret;
126
127         btrfs_mark_buffer_dirty(cow);
128         *cow_ret = cow;
129         return 0;
130 }
131
132 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
133                              struct btrfs_root *root,
134                              struct extent_buffer *buf,
135                              struct extent_buffer *parent, int parent_slot,
136                              struct extent_buffer **cow_ret,
137                              u64 search_start, u64 empty_size)
138 {
139         u64 root_gen;
140         struct extent_buffer *cow;
141         u32 nritems;
142         int ret = 0;
143         int different_trans = 0;
144         int level;
145         struct btrfs_key first_key;
146
147         if (root->ref_cows) {
148                 root_gen = trans->transid;
149         } else {
150                 root_gen = 0;
151         }
152
153         WARN_ON(root->ref_cows && trans->transid !=
154                 root->fs_info->running_transaction->transid);
155         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
156
157         level = btrfs_header_level(buf);
158         nritems = btrfs_header_nritems(buf);
159         if (nritems) {
160                 if (level == 0)
161                         btrfs_item_key_to_cpu(buf, &first_key, 0);
162                 else
163                         btrfs_node_key_to_cpu(buf, &first_key, 0);
164         } else {
165                 first_key.objectid = 0;
166         }
167         cow = __btrfs_alloc_free_block(trans, root, buf->len,
168                                      root->root_key.objectid,
169                                      root_gen, first_key.objectid, level,
170                                      search_start, empty_size);
171         if (IS_ERR(cow))
172                 return PTR_ERR(cow);
173
174         copy_extent_buffer(cow, buf, 0, 0, cow->len);
175         btrfs_set_header_bytenr(cow, cow->start);
176         btrfs_set_header_generation(cow, trans->transid);
177         btrfs_set_header_owner(cow, root->root_key.objectid);
178
179         WARN_ON(btrfs_header_generation(buf) > trans->transid);
180         if (btrfs_header_generation(buf) != trans->transid) {
181                 different_trans = 1;
182                 ret = btrfs_inc_ref(trans, root, buf);
183                 if (ret)
184                         return ret;
185         } else {
186                 clean_tree_block(trans, root, buf);
187         }
188
189         if (buf == root->node) {
190                 root_gen = btrfs_header_generation(buf);
191                 root->node = cow;
192                 extent_buffer_get(cow);
193                 if (buf != root->commit_root) {
194                         btrfs_free_extent(trans, root, buf->start,
195                                           buf->len, root->root_key.objectid,
196                                           root_gen, 0, 0, 1);
197                 }
198                 free_extent_buffer(buf);
199         } else {
200                 root_gen = btrfs_header_generation(parent);
201                 btrfs_set_node_blockptr(parent, parent_slot,
202                                         cow->start);
203                 WARN_ON(trans->transid == 0);
204                 btrfs_set_node_ptr_generation(parent, parent_slot,
205                                               trans->transid);
206                 btrfs_mark_buffer_dirty(parent);
207                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
208                 btrfs_free_extent(trans, root, buf->start, buf->len,
209                                   btrfs_header_owner(parent), root_gen,
210                                   0, 0, 1);
211         }
212         free_extent_buffer(buf);
213         btrfs_mark_buffer_dirty(cow);
214         *cow_ret = cow;
215         return 0;
216 }
217
218 int btrfs_cow_block(struct btrfs_trans_handle *trans,
219                     struct btrfs_root *root, struct extent_buffer *buf,
220                     struct extent_buffer *parent, int parent_slot,
221                     struct extent_buffer **cow_ret)
222 {
223         u64 search_start;
224         int ret;
225         if (trans->transaction != root->fs_info->running_transaction) {
226                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
227                        root->fs_info->running_transaction->transid);
228                 WARN_ON(1);
229         }
230         if (trans->transid != root->fs_info->generation) {
231                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
232                        root->fs_info->generation);
233                 WARN_ON(1);
234         }
235         if (btrfs_header_generation(buf) == trans->transid) {
236                 *cow_ret = buf;
237                 return 0;
238         }
239
240         search_start = buf->start & ~((u64)BTRFS_BLOCK_GROUP_SIZE - 1);
241         ret = __btrfs_cow_block(trans, root, buf, parent,
242                                  parent_slot, cow_ret, search_start, 0);
243         return ret;
244 }
245
246 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
247 {
248         if (blocknr < other && other - (blocknr + blocksize) < 32768)
249                 return 1;
250         if (blocknr > other && blocknr - (other + blocksize) < 32768)
251                 return 1;
252         return 0;
253 }
254
255 /*
256  * compare two keys in a memcmp fashion
257  */
258 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
259 {
260         struct btrfs_key k1;
261
262         btrfs_disk_key_to_cpu(&k1, disk);
263
264         if (k1.objectid > k2->objectid)
265                 return 1;
266         if (k1.objectid < k2->objectid)
267                 return -1;
268         if (k1.type > k2->type)
269                 return 1;
270         if (k1.type < k2->type)
271                 return -1;
272         if (k1.offset > k2->offset)
273                 return 1;
274         if (k1.offset < k2->offset)
275                 return -1;
276         return 0;
277 }
278
279
280 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
281                        struct btrfs_root *root, struct extent_buffer *parent,
282                        int start_slot, int cache_only, u64 *last_ret,
283                        struct btrfs_key *progress)
284 {
285         struct extent_buffer *cur;
286         struct extent_buffer *tmp;
287         u64 blocknr;
288         u64 search_start = *last_ret;
289         u64 last_block = 0;
290         u64 other;
291         u32 parent_nritems;
292         int end_slot;
293         int i;
294         int err = 0;
295         int parent_level;
296         int uptodate;
297         u32 blocksize;
298         int progress_passed = 0;
299         struct btrfs_disk_key disk_key;
300
301         parent_level = btrfs_header_level(parent);
302         if (cache_only && parent_level != 1)
303                 return 0;
304
305         if (trans->transaction != root->fs_info->running_transaction) {
306                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
307                        root->fs_info->running_transaction->transid);
308                 WARN_ON(1);
309         }
310         if (trans->transid != root->fs_info->generation) {
311                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
312                        root->fs_info->generation);
313                 WARN_ON(1);
314         }
315
316         parent_nritems = btrfs_header_nritems(parent);
317         blocksize = btrfs_level_size(root, parent_level - 1);
318         end_slot = parent_nritems;
319
320         if (parent_nritems == 1)
321                 return 0;
322
323         for (i = start_slot; i < end_slot; i++) {
324                 int close = 1;
325
326                 if (!parent->map_token) {
327                         map_extent_buffer(parent,
328                                         btrfs_node_key_ptr_offset(i),
329                                         sizeof(struct btrfs_key_ptr),
330                                         &parent->map_token, &parent->kaddr,
331                                         &parent->map_start, &parent->map_len,
332                                         KM_USER1);
333                 }
334                 btrfs_node_key(parent, &disk_key, i);
335                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
336                         continue;
337
338                 progress_passed = 1;
339                 blocknr = btrfs_node_blockptr(parent, i);
340                 if (last_block == 0)
341                         last_block = blocknr;
342
343                 if (i > 0) {
344                         other = btrfs_node_blockptr(parent, i - 1);
345                         close = close_blocks(blocknr, other, blocksize);
346                 }
347                 if (close && i < end_slot - 2) {
348                         other = btrfs_node_blockptr(parent, i + 1);
349                         close = close_blocks(blocknr, other, blocksize);
350                 }
351                 if (close) {
352                         last_block = blocknr;
353                         continue;
354                 }
355                 if (parent->map_token) {
356                         unmap_extent_buffer(parent, parent->map_token,
357                                             KM_USER1);
358                         parent->map_token = NULL;
359                 }
360
361                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
362                 if (cur)
363                         uptodate = btrfs_buffer_uptodate(cur);
364                 else
365                         uptodate = 0;
366                 if (!cur || !uptodate) {
367                         if (cache_only) {
368                                 free_extent_buffer(cur);
369                                 continue;
370                         }
371                         if (!cur) {
372                                 cur = read_tree_block(root, blocknr,
373                                                          blocksize);
374                         } else if (!uptodate) {
375                                 btrfs_read_buffer(cur);
376                         }
377                 }
378                 if (search_start == 0)
379                         search_start = last_block;
380
381                 err = __btrfs_cow_block(trans, root, cur, parent, i,
382                                         &tmp, search_start,
383                                         min(16 * blocksize,
384                                             (end_slot - i) * blocksize));
385                 if (err) {
386                         free_extent_buffer(cur);
387                         break;
388                 }
389                 search_start = tmp->start;
390                 last_block = tmp->start;
391                 *last_ret = search_start;
392                 if (parent_level == 1)
393                         btrfs_clear_buffer_defrag(tmp);
394                 free_extent_buffer(tmp);
395         }
396         if (parent->map_token) {
397                 unmap_extent_buffer(parent, parent->map_token,
398                                     KM_USER1);
399                 parent->map_token = NULL;
400         }
401         return err;
402 }
403
404 /*
405  * The leaf data grows from end-to-front in the node.
406  * this returns the address of the start of the last item,
407  * which is the stop of the leaf data stack
408  */
409 static inline unsigned int leaf_data_end(struct btrfs_root *root,
410                                          struct extent_buffer *leaf)
411 {
412         u32 nr = btrfs_header_nritems(leaf);
413         if (nr == 0)
414                 return BTRFS_LEAF_DATA_SIZE(root);
415         return btrfs_item_offset_nr(leaf, nr - 1);
416 }
417
418 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
419                       int level)
420 {
421         struct extent_buffer *parent = NULL;
422         struct extent_buffer *node = path->nodes[level];
423         struct btrfs_disk_key parent_key;
424         struct btrfs_disk_key node_key;
425         int parent_slot;
426         int slot;
427         struct btrfs_key cpukey;
428         u32 nritems = btrfs_header_nritems(node);
429
430         if (path->nodes[level + 1])
431                 parent = path->nodes[level + 1];
432
433         slot = path->slots[level];
434         BUG_ON(nritems == 0);
435         if (parent) {
436                 parent_slot = path->slots[level + 1];
437                 btrfs_node_key(parent, &parent_key, parent_slot);
438                 btrfs_node_key(node, &node_key, 0);
439                 BUG_ON(memcmp(&parent_key, &node_key,
440                               sizeof(struct btrfs_disk_key)));
441                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
442                        btrfs_header_bytenr(node));
443         }
444         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
445         if (slot != 0) {
446                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
447                 btrfs_node_key(node, &node_key, slot);
448                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
449         }
450         if (slot < nritems - 1) {
451                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
452                 btrfs_node_key(node, &node_key, slot);
453                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
454         }
455         return 0;
456 }
457
458 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
459                       int level)
460 {
461         struct extent_buffer *leaf = path->nodes[level];
462         struct extent_buffer *parent = NULL;
463         int parent_slot;
464         struct btrfs_key cpukey;
465         struct btrfs_disk_key parent_key;
466         struct btrfs_disk_key leaf_key;
467         int slot = path->slots[0];
468
469         u32 nritems = btrfs_header_nritems(leaf);
470
471         if (path->nodes[level + 1])
472                 parent = path->nodes[level + 1];
473
474         if (nritems == 0)
475                 return 0;
476
477         if (parent) {
478                 parent_slot = path->slots[level + 1];
479                 btrfs_node_key(parent, &parent_key, parent_slot);
480                 btrfs_item_key(leaf, &leaf_key, 0);
481
482                 BUG_ON(memcmp(&parent_key, &leaf_key,
483                        sizeof(struct btrfs_disk_key)));
484                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
485                        btrfs_header_bytenr(leaf));
486         }
487 #if 0
488         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
489                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
490                 btrfs_item_key(leaf, &leaf_key, i);
491                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
492                         btrfs_print_leaf(root, leaf);
493                         printk("slot %d offset bad key\n", i);
494                         BUG_ON(1);
495                 }
496                 if (btrfs_item_offset_nr(leaf, i) !=
497                         btrfs_item_end_nr(leaf, i + 1)) {
498                         btrfs_print_leaf(root, leaf);
499                         printk("slot %d offset bad\n", i);
500                         BUG_ON(1);
501                 }
502                 if (i == 0) {
503                         if (btrfs_item_offset_nr(leaf, i) +
504                                btrfs_item_size_nr(leaf, i) !=
505                                BTRFS_LEAF_DATA_SIZE(root)) {
506                                 btrfs_print_leaf(root, leaf);
507                                 printk("slot %d first offset bad\n", i);
508                                 BUG_ON(1);
509                         }
510                 }
511         }
512         if (nritems > 0) {
513                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
514                                 btrfs_print_leaf(root, leaf);
515                                 printk("slot %d bad size \n", nritems - 1);
516                                 BUG_ON(1);
517                 }
518         }
519 #endif
520         if (slot != 0 && slot < nritems - 1) {
521                 btrfs_item_key(leaf, &leaf_key, slot);
522                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
523                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
524                         btrfs_print_leaf(root, leaf);
525                         printk("slot %d offset bad key\n", slot);
526                         BUG_ON(1);
527                 }
528                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
529                        btrfs_item_end_nr(leaf, slot)) {
530                         btrfs_print_leaf(root, leaf);
531                         printk("slot %d offset bad\n", slot);
532                         BUG_ON(1);
533                 }
534         }
535         if (slot < nritems - 1) {
536                 btrfs_item_key(leaf, &leaf_key, slot);
537                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
538                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
539                 if (btrfs_item_offset_nr(leaf, slot) !=
540                         btrfs_item_end_nr(leaf, slot + 1)) {
541                         btrfs_print_leaf(root, leaf);
542                         printk("slot %d offset bad\n", slot);
543                         BUG_ON(1);
544                 }
545         }
546         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
547                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
548         return 0;
549 }
550
551 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
552                         int level)
553 {
554         return 0;
555 #if 0
556         struct extent_buffer *buf = path->nodes[level];
557
558         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
559                                  (unsigned long)btrfs_header_fsid(buf),
560                                  BTRFS_FSID_SIZE)) {
561                 printk("warning bad block %Lu\n", buf->start);
562                 return 1;
563         }
564 #endif
565         if (level == 0)
566                 return check_leaf(root, path, level);
567         return check_node(root, path, level);
568 }
569
570 /*
571  * search for key in the extent_buffer.  The items start at offset p,
572  * and they are item_size apart.  There are 'max' items in p.
573  *
574  * the slot in the array is returned via slot, and it points to
575  * the place where you would insert key if it is not found in
576  * the array.
577  *
578  * slot may point to max if the key is bigger than all of the keys
579  */
580 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
581                               int item_size, struct btrfs_key *key,
582                               int max, int *slot)
583 {
584         int low = 0;
585         int high = max;
586         int mid;
587         int ret;
588         struct btrfs_disk_key *tmp = NULL;
589         struct btrfs_disk_key unaligned;
590         unsigned long offset;
591         char *map_token = NULL;
592         char *kaddr = NULL;
593         unsigned long map_start = 0;
594         unsigned long map_len = 0;
595         int err;
596
597         while(low < high) {
598                 mid = (low + high) / 2;
599                 offset = p + mid * item_size;
600
601                 if (!map_token || offset < map_start ||
602                     (offset + sizeof(struct btrfs_disk_key)) >
603                     map_start + map_len) {
604                         if (map_token) {
605                                 unmap_extent_buffer(eb, map_token, KM_USER0);
606                                 map_token = NULL;
607                         }
608                         err = map_extent_buffer(eb, offset,
609                                                 sizeof(struct btrfs_disk_key),
610                                                 &map_token, &kaddr,
611                                                 &map_start, &map_len, KM_USER0);
612
613                         if (!err) {
614                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
615                                                         map_start);
616                         } else {
617                                 read_extent_buffer(eb, &unaligned,
618                                                    offset, sizeof(unaligned));
619                                 tmp = &unaligned;
620                         }
621
622                 } else {
623                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
624                                                         map_start);
625                 }
626                 ret = comp_keys(tmp, key);
627
628                 if (ret < 0)
629                         low = mid + 1;
630                 else if (ret > 0)
631                         high = mid;
632                 else {
633                         *slot = mid;
634                         if (map_token)
635                                 unmap_extent_buffer(eb, map_token, KM_USER0);
636                         return 0;
637                 }
638         }
639         *slot = low;
640         if (map_token)
641                 unmap_extent_buffer(eb, map_token, KM_USER0);
642         return 1;
643 }
644
645 /*
646  * simple bin_search frontend that does the right thing for
647  * leaves vs nodes
648  */
649 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
650                       int level, int *slot)
651 {
652         if (level == 0) {
653                 return generic_bin_search(eb,
654                                           offsetof(struct btrfs_leaf, items),
655                                           sizeof(struct btrfs_item),
656                                           key, btrfs_header_nritems(eb),
657                                           slot);
658         } else {
659                 return generic_bin_search(eb,
660                                           offsetof(struct btrfs_node, ptrs),
661                                           sizeof(struct btrfs_key_ptr),
662                                           key, btrfs_header_nritems(eb),
663                                           slot);
664         }
665         return -1;
666 }
667
668 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
669                                    struct extent_buffer *parent, int slot)
670 {
671         if (slot < 0)
672                 return NULL;
673         if (slot >= btrfs_header_nritems(parent))
674                 return NULL;
675         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
676                        btrfs_level_size(root, btrfs_header_level(parent) - 1));
677 }
678
679 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
680                          *root, struct btrfs_path *path, int level)
681 {
682         struct extent_buffer *right = NULL;
683         struct extent_buffer *mid;
684         struct extent_buffer *left = NULL;
685         struct extent_buffer *parent = NULL;
686         int ret = 0;
687         int wret;
688         int pslot;
689         int orig_slot = path->slots[level];
690         int err_on_enospc = 0;
691         u64 orig_ptr;
692
693         if (level == 0)
694                 return 0;
695
696         mid = path->nodes[level];
697         WARN_ON(btrfs_header_generation(mid) != trans->transid);
698
699         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
700
701         if (level < BTRFS_MAX_LEVEL - 1)
702                 parent = path->nodes[level + 1];
703         pslot = path->slots[level + 1];
704
705         /*
706          * deal with the case where there is only one pointer in the root
707          * by promoting the node below to a root
708          */
709         if (!parent) {
710                 struct extent_buffer *child;
711
712                 if (btrfs_header_nritems(mid) != 1)
713                         return 0;
714
715                 /* promote the child to a root */
716                 child = read_node_slot(root, mid, 0);
717                 BUG_ON(!child);
718                 root->node = child;
719                 path->nodes[level] = NULL;
720                 clean_tree_block(trans, root, mid);
721                 wait_on_tree_block_writeback(root, mid);
722                 /* once for the path */
723                 free_extent_buffer(mid);
724                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
725                                         root->root_key.objectid,
726                                         btrfs_header_generation(mid), 0, 0, 1);
727                 /* once for the root ptr */
728                 free_extent_buffer(mid);
729                 return ret;
730         }
731         if (btrfs_header_nritems(mid) >
732             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
733                 return 0;
734
735         if (btrfs_header_nritems(mid) < 2)
736                 err_on_enospc = 1;
737
738         left = read_node_slot(root, parent, pslot - 1);
739         if (left) {
740                 wret = btrfs_cow_block(trans, root, left,
741                                        parent, pslot - 1, &left);
742                 if (wret) {
743                         ret = wret;
744                         goto enospc;
745                 }
746         }
747         right = read_node_slot(root, parent, pslot + 1);
748         if (right) {
749                 wret = btrfs_cow_block(trans, root, right,
750                                        parent, pslot + 1, &right);
751                 if (wret) {
752                         ret = wret;
753                         goto enospc;
754                 }
755         }
756
757         /* first, try to make some room in the middle buffer */
758         if (left) {
759                 orig_slot += btrfs_header_nritems(left);
760                 wret = push_node_left(trans, root, left, mid);
761                 if (wret < 0)
762                         ret = wret;
763                 if (btrfs_header_nritems(mid) < 2)
764                         err_on_enospc = 1;
765         }
766
767         /*
768          * then try to empty the right most buffer into the middle
769          */
770         if (right) {
771                 wret = push_node_left(trans, root, mid, right);
772                 if (wret < 0 && wret != -ENOSPC)
773                         ret = wret;
774                 if (btrfs_header_nritems(right) == 0) {
775                         u64 bytenr = right->start;
776                         u64 generation = btrfs_header_generation(parent);
777                         u32 blocksize = right->len;
778
779                         clean_tree_block(trans, root, right);
780                         wait_on_tree_block_writeback(root, right);
781                         free_extent_buffer(right);
782                         right = NULL;
783                         wret = del_ptr(trans, root, path, level + 1, pslot +
784                                        1);
785                         if (wret)
786                                 ret = wret;
787                         wret = btrfs_free_extent(trans, root, bytenr,
788                                                  blocksize,
789                                                  btrfs_header_owner(parent),
790                                                  generation, 0, 0, 1);
791                         if (wret)
792                                 ret = wret;
793                 } else {
794                         struct btrfs_disk_key right_key;
795                         btrfs_node_key(right, &right_key, 0);
796                         btrfs_set_node_key(parent, &right_key, pslot + 1);
797                         btrfs_mark_buffer_dirty(parent);
798                 }
799         }
800         if (btrfs_header_nritems(mid) == 1) {
801                 /*
802                  * we're not allowed to leave a node with one item in the
803                  * tree during a delete.  A deletion from lower in the tree
804                  * could try to delete the only pointer in this node.
805                  * So, pull some keys from the left.
806                  * There has to be a left pointer at this point because
807                  * otherwise we would have pulled some pointers from the
808                  * right
809                  */
810                 BUG_ON(!left);
811                 wret = balance_node_right(trans, root, mid, left);
812                 if (wret < 0) {
813                         ret = wret;
814                         goto enospc;
815                 }
816                 BUG_ON(wret == 1);
817         }
818         if (btrfs_header_nritems(mid) == 0) {
819                 /* we've managed to empty the middle node, drop it */
820                 u64 root_gen = btrfs_header_generation(parent);
821                 u64 bytenr = mid->start;
822                 u32 blocksize = mid->len;
823                 clean_tree_block(trans, root, mid);
824                 wait_on_tree_block_writeback(root, mid);
825                 free_extent_buffer(mid);
826                 mid = NULL;
827                 wret = del_ptr(trans, root, path, level + 1, pslot);
828                 if (wret)
829                         ret = wret;
830                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
831                                          btrfs_header_owner(parent),
832                                          root_gen, 0, 0, 1);
833                 if (wret)
834                         ret = wret;
835         } else {
836                 /* update the parent key to reflect our changes */
837                 struct btrfs_disk_key mid_key;
838                 btrfs_node_key(mid, &mid_key, 0);
839                 btrfs_set_node_key(parent, &mid_key, pslot);
840                 btrfs_mark_buffer_dirty(parent);
841         }
842
843         /* update the path */
844         if (left) {
845                 if (btrfs_header_nritems(left) > orig_slot) {
846                         extent_buffer_get(left);
847                         path->nodes[level] = left;
848                         path->slots[level + 1] -= 1;
849                         path->slots[level] = orig_slot;
850                         if (mid)
851                                 free_extent_buffer(mid);
852                 } else {
853                         orig_slot -= btrfs_header_nritems(left);
854                         path->slots[level] = orig_slot;
855                 }
856         }
857         /* double check we haven't messed things up */
858         check_block(root, path, level);
859         if (orig_ptr !=
860             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
861                 BUG();
862 enospc:
863         if (right)
864                 free_extent_buffer(right);
865         if (left)
866                 free_extent_buffer(left);
867         return ret;
868 }
869
870 /* returns zero if the push worked, non-zero otherwise */
871 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
872                                 struct btrfs_root *root,
873                                 struct btrfs_path *path, int level)
874 {
875         struct extent_buffer *right = NULL;
876         struct extent_buffer *mid;
877         struct extent_buffer *left = NULL;
878         struct extent_buffer *parent = NULL;
879         int ret = 0;
880         int wret;
881         int pslot;
882         int orig_slot = path->slots[level];
883         u64 orig_ptr;
884
885         if (level == 0)
886                 return 1;
887
888         mid = path->nodes[level];
889         WARN_ON(btrfs_header_generation(mid) != trans->transid);
890         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
891
892         if (level < BTRFS_MAX_LEVEL - 1)
893                 parent = path->nodes[level + 1];
894         pslot = path->slots[level + 1];
895
896         if (!parent)
897                 return 1;
898
899         left = read_node_slot(root, parent, pslot - 1);
900
901         /* first, try to make some room in the middle buffer */
902         if (left) {
903                 u32 left_nr;
904                 left_nr = btrfs_header_nritems(left);
905                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
906                         wret = 1;
907                 } else {
908                         ret = btrfs_cow_block(trans, root, left, parent,
909                                               pslot - 1, &left);
910                         if (ret)
911                                 wret = 1;
912                         else {
913                                 wret = push_node_left(trans, root,
914                                                       left, mid);
915                         }
916                 }
917                 if (wret < 0)
918                         ret = wret;
919                 if (wret == 0) {
920                         struct btrfs_disk_key disk_key;
921                         orig_slot += left_nr;
922                         btrfs_node_key(mid, &disk_key, 0);
923                         btrfs_set_node_key(parent, &disk_key, pslot);
924                         btrfs_mark_buffer_dirty(parent);
925                         if (btrfs_header_nritems(left) > orig_slot) {
926                                 path->nodes[level] = left;
927                                 path->slots[level + 1] -= 1;
928                                 path->slots[level] = orig_slot;
929                                 free_extent_buffer(mid);
930                         } else {
931                                 orig_slot -=
932                                         btrfs_header_nritems(left);
933                                 path->slots[level] = orig_slot;
934                                 free_extent_buffer(left);
935                         }
936                         return 0;
937                 }
938                 free_extent_buffer(left);
939         }
940         right= read_node_slot(root, parent, pslot + 1);
941
942         /*
943          * then try to empty the right most buffer into the middle
944          */
945         if (right) {
946                 u32 right_nr;
947                 right_nr = btrfs_header_nritems(right);
948                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
949                         wret = 1;
950                 } else {
951                         ret = btrfs_cow_block(trans, root, right,
952                                               parent, pslot + 1,
953                                               &right);
954                         if (ret)
955                                 wret = 1;
956                         else {
957                                 wret = balance_node_right(trans, root,
958                                                           right, mid);
959                         }
960                 }
961                 if (wret < 0)
962                         ret = wret;
963                 if (wret == 0) {
964                         struct btrfs_disk_key disk_key;
965
966                         btrfs_node_key(right, &disk_key, 0);
967                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
968                         btrfs_mark_buffer_dirty(parent);
969
970                         if (btrfs_header_nritems(mid) <= orig_slot) {
971                                 path->nodes[level] = right;
972                                 path->slots[level + 1] += 1;
973                                 path->slots[level] = orig_slot -
974                                         btrfs_header_nritems(mid);
975                                 free_extent_buffer(mid);
976                         } else {
977                                 free_extent_buffer(right);
978                         }
979                         return 0;
980                 }
981                 free_extent_buffer(right);
982         }
983         return 1;
984 }
985
986 /*
987  * readahead one full node of leaves
988  */
989 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
990                              int level, int slot, u64 objectid)
991 {
992         struct extent_buffer *node;
993         struct btrfs_disk_key disk_key;
994         u32 nritems;
995         u64 search;
996         u64 lowest_read;
997         u64 highest_read;
998         u64 nread = 0;
999         int direction = path->reada;
1000         struct extent_buffer *eb;
1001         u32 nr;
1002         u32 blocksize;
1003         u32 nscan = 0;
1004
1005         if (level != 1)
1006                 return;
1007
1008         if (!path->nodes[level])
1009                 return;
1010
1011         node = path->nodes[level];
1012         search = btrfs_node_blockptr(node, slot);
1013         blocksize = btrfs_level_size(root, level - 1);
1014         eb = btrfs_find_tree_block(root, search, blocksize);
1015         if (eb) {
1016                 free_extent_buffer(eb);
1017                 return;
1018         }
1019
1020         highest_read = search;
1021         lowest_read = search;
1022
1023         nritems = btrfs_header_nritems(node);
1024         nr = slot;
1025         while(1) {
1026                 if (direction < 0) {
1027                         if (nr == 0)
1028                                 break;
1029                         nr--;
1030                 } else if (direction > 0) {
1031                         nr++;
1032                         if (nr >= nritems)
1033                                 break;
1034                 }
1035                 if (path->reada < 0 && objectid) {
1036                         btrfs_node_key(node, &disk_key, nr);
1037                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1038                                 break;
1039                 }
1040                 search = btrfs_node_blockptr(node, nr);
1041                 if ((search >= lowest_read && search <= highest_read) ||
1042                     (search < lowest_read && lowest_read - search <= 32768) ||
1043                     (search > highest_read && search - highest_read <= 32768)) {
1044                         readahead_tree_block(root, search, blocksize);
1045                         nread += blocksize;
1046                 }
1047                 nscan++;
1048                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1049                         break;
1050                 if(nread > (1024 * 1024) || nscan > 128)
1051                         break;
1052
1053                 if (search < lowest_read)
1054                         lowest_read = search;
1055                 if (search > highest_read)
1056                         highest_read = search;
1057         }
1058 }
1059 /*
1060  * look for key in the tree.  path is filled in with nodes along the way
1061  * if key is found, we return zero and you can find the item in the leaf
1062  * level of the path (level 0)
1063  *
1064  * If the key isn't found, the path points to the slot where it should
1065  * be inserted, and 1 is returned.  If there are other errors during the
1066  * search a negative error number is returned.
1067  *
1068  * if ins_len > 0, nodes and leaves will be split as we walk down the
1069  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1070  * possible)
1071  */
1072 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1073                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1074                       ins_len, int cow)
1075 {
1076         struct extent_buffer *b;
1077         u64 bytenr;
1078         u64 ptr_gen;
1079         int slot;
1080         int ret;
1081         int level;
1082         int should_reada = p->reada;
1083         u8 lowest_level = 0;
1084
1085         lowest_level = p->lowest_level;
1086         WARN_ON(lowest_level && ins_len);
1087         WARN_ON(p->nodes[0] != NULL);
1088         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1089 again:
1090         b = root->node;
1091         extent_buffer_get(b);
1092         while (b) {
1093                 level = btrfs_header_level(b);
1094                 if (cow) {
1095                         int wret;
1096                         wret = btrfs_cow_block(trans, root, b,
1097                                                p->nodes[level + 1],
1098                                                p->slots[level + 1],
1099                                                &b);
1100                         if (wret) {
1101                                 free_extent_buffer(b);
1102                                 return wret;
1103                         }
1104                 }
1105                 BUG_ON(!cow && ins_len);
1106                 if (level != btrfs_header_level(b))
1107                         WARN_ON(1);
1108                 level = btrfs_header_level(b);
1109                 p->nodes[level] = b;
1110                 ret = check_block(root, p, level);
1111                 if (ret)
1112                         return -1;
1113                 ret = bin_search(b, key, level, &slot);
1114                 if (level != 0) {
1115                         if (ret && slot > 0)
1116                                 slot -= 1;
1117                         p->slots[level] = slot;
1118                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1119                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
1120                                 int sret = split_node(trans, root, p, level);
1121                                 BUG_ON(sret > 0);
1122                                 if (sret)
1123                                         return sret;
1124                                 b = p->nodes[level];
1125                                 slot = p->slots[level];
1126                         } else if (ins_len < 0) {
1127                                 int sret = balance_level(trans, root, p,
1128                                                          level);
1129                                 if (sret)
1130                                         return sret;
1131                                 b = p->nodes[level];
1132                                 if (!b) {
1133                                         btrfs_release_path(NULL, p);
1134                                         goto again;
1135                                 }
1136                                 slot = p->slots[level];
1137                                 BUG_ON(btrfs_header_nritems(b) == 1);
1138                         }
1139                         /* this is only true while dropping a snapshot */
1140                         if (level == lowest_level)
1141                                 break;
1142                         bytenr = btrfs_node_blockptr(b, slot);
1143                         ptr_gen = btrfs_node_ptr_generation(b, slot);
1144                         if (should_reada)
1145                                 reada_for_search(root, p, level, slot,
1146                                                  key->objectid);
1147                         b = read_tree_block(root, bytenr,
1148                                             btrfs_level_size(root, level - 1));
1149                         if (ptr_gen != btrfs_header_generation(b)) {
1150                                 printk("block %llu bad gen wanted %llu "
1151                                        "found %llu\n",
1152                                 (unsigned long long)b->start,
1153                                 (unsigned long long)ptr_gen,
1154                                 (unsigned long long)btrfs_header_generation(b));
1155                         }
1156                 } else {
1157                         p->slots[level] = slot;
1158                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1159                             sizeof(struct btrfs_item) + ins_len) {
1160                                 int sret = split_leaf(trans, root, key,
1161                                                       p, ins_len, ret == 0);
1162                                 BUG_ON(sret > 0);
1163                                 if (sret)
1164                                         return sret;
1165                         }
1166                         return ret;
1167                 }
1168         }
1169         return 1;
1170 }
1171
1172 /*
1173  * adjust the pointers going up the tree, starting at level
1174  * making sure the right key of each node is points to 'key'.
1175  * This is used after shifting pointers to the left, so it stops
1176  * fixing up pointers when a given leaf/node is not in slot 0 of the
1177  * higher levels
1178  *
1179  * If this fails to write a tree block, it returns -1, but continues
1180  * fixing up the blocks in ram so the tree is consistent.
1181  */
1182 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1183                           struct btrfs_root *root, struct btrfs_path *path,
1184                           struct btrfs_disk_key *key, int level)
1185 {
1186         int i;
1187         int ret = 0;
1188         struct extent_buffer *t;
1189
1190         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1191                 int tslot = path->slots[i];
1192                 if (!path->nodes[i])
1193                         break;
1194                 t = path->nodes[i];
1195                 btrfs_set_node_key(t, key, tslot);
1196                 btrfs_mark_buffer_dirty(path->nodes[i]);
1197                 if (tslot != 0)
1198                         break;
1199         }
1200         return ret;
1201 }
1202
1203 /*
1204  * try to push data from one node into the next node left in the
1205  * tree.
1206  *
1207  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1208  * error, and > 0 if there was no room in the left hand block.
1209  */
1210 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
1211                           *root, struct extent_buffer *dst,
1212                           struct extent_buffer *src)
1213 {
1214         int push_items = 0;
1215         int src_nritems;
1216         int dst_nritems;
1217         int ret = 0;
1218
1219         src_nritems = btrfs_header_nritems(src);
1220         dst_nritems = btrfs_header_nritems(dst);
1221         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1222         WARN_ON(btrfs_header_generation(src) != trans->transid);
1223         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1224
1225         if (push_items <= 0) {
1226                 return 1;
1227         }
1228
1229         if (src_nritems < push_items)
1230                 push_items = src_nritems;
1231
1232         copy_extent_buffer(dst, src,
1233                            btrfs_node_key_ptr_offset(dst_nritems),
1234                            btrfs_node_key_ptr_offset(0),
1235                            push_items * sizeof(struct btrfs_key_ptr));
1236
1237         if (push_items < src_nritems) {
1238                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1239                                       btrfs_node_key_ptr_offset(push_items),
1240                                       (src_nritems - push_items) *
1241                                       sizeof(struct btrfs_key_ptr));
1242         }
1243         btrfs_set_header_nritems(src, src_nritems - push_items);
1244         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1245         btrfs_mark_buffer_dirty(src);
1246         btrfs_mark_buffer_dirty(dst);
1247         return ret;
1248 }
1249
1250 /*
1251  * try to push data from one node into the next node right in the
1252  * tree.
1253  *
1254  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1255  * error, and > 0 if there was no room in the right hand block.
1256  *
1257  * this will  only push up to 1/2 the contents of the left node over
1258  */
1259 static int balance_node_right(struct btrfs_trans_handle *trans,
1260                               struct btrfs_root *root,
1261                               struct extent_buffer *dst,
1262                               struct extent_buffer *src)
1263 {
1264         int push_items = 0;
1265         int max_push;
1266         int src_nritems;
1267         int dst_nritems;
1268         int ret = 0;
1269
1270         WARN_ON(btrfs_header_generation(src) != trans->transid);
1271         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1272
1273         src_nritems = btrfs_header_nritems(src);
1274         dst_nritems = btrfs_header_nritems(dst);
1275         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1276         if (push_items <= 0)
1277                 return 1;
1278
1279         max_push = src_nritems / 2 + 1;
1280         /* don't try to empty the node */
1281         if (max_push >= src_nritems)
1282                 return 1;
1283
1284         if (max_push < push_items)
1285                 push_items = max_push;
1286
1287         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1288                                       btrfs_node_key_ptr_offset(0),
1289                                       (dst_nritems) *
1290                                       sizeof(struct btrfs_key_ptr));
1291
1292         copy_extent_buffer(dst, src,
1293                            btrfs_node_key_ptr_offset(0),
1294                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1295                            push_items * sizeof(struct btrfs_key_ptr));
1296
1297         btrfs_set_header_nritems(src, src_nritems - push_items);
1298         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1299
1300         btrfs_mark_buffer_dirty(src);
1301         btrfs_mark_buffer_dirty(dst);
1302         return ret;
1303 }
1304
1305 /*
1306  * helper function to insert a new root level in the tree.
1307  * A new node is allocated, and a single item is inserted to
1308  * point to the existing root
1309  *
1310  * returns zero on success or < 0 on failure.
1311  */
1312 static int insert_new_root(struct btrfs_trans_handle *trans,
1313                            struct btrfs_root *root,
1314                            struct btrfs_path *path, int level)
1315 {
1316         u64 root_gen;
1317         u64 lower_gen;
1318         struct extent_buffer *lower;
1319         struct extent_buffer *c;
1320         struct btrfs_disk_key lower_key;
1321
1322         BUG_ON(path->nodes[level]);
1323         BUG_ON(path->nodes[level-1] != root->node);
1324
1325         if (root->ref_cows)
1326                 root_gen = trans->transid;
1327         else
1328                 root_gen = 0;
1329
1330         lower = path->nodes[level-1];
1331         if (level == 1)
1332                 btrfs_item_key(lower, &lower_key, 0);
1333         else
1334                 btrfs_node_key(lower, &lower_key, 0);
1335
1336         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1337                                    root->root_key.objectid,
1338                                    root_gen, lower_key.objectid, level,
1339                                    root->node->start, 0);
1340         if (IS_ERR(c))
1341                 return PTR_ERR(c);
1342         memset_extent_buffer(c, 0, 0, root->nodesize);
1343         btrfs_set_header_nritems(c, 1);
1344         btrfs_set_header_level(c, level);
1345         btrfs_set_header_bytenr(c, c->start);
1346         btrfs_set_header_generation(c, trans->transid);
1347         btrfs_set_header_owner(c, root->root_key.objectid);
1348
1349         write_extent_buffer(c, root->fs_info->fsid,
1350                             (unsigned long)btrfs_header_fsid(c),
1351                             BTRFS_FSID_SIZE);
1352         btrfs_set_node_key(c, &lower_key, 0);
1353         btrfs_set_node_blockptr(c, 0, lower->start);
1354         lower_gen = btrfs_header_generation(lower);
1355         WARN_ON(lower_gen == 0);
1356
1357         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1358
1359         btrfs_mark_buffer_dirty(c);
1360
1361         /* the super has an extra ref to root->node */
1362         free_extent_buffer(root->node);
1363         root->node = c;
1364         extent_buffer_get(c);
1365         path->nodes[level] = c;
1366         path->slots[level] = 0;
1367
1368         if (root->ref_cows && lower_gen != trans->transid) {
1369                 struct btrfs_path *back_path = btrfs_alloc_path();
1370                 int ret;
1371                 ret = btrfs_insert_extent_backref(trans,
1372                                                   root->fs_info->extent_root,
1373                                                   path, lower->start,
1374                                                   root->root_key.objectid,
1375                                                   trans->transid, 0, 0);
1376                 BUG_ON(ret);
1377                 btrfs_free_path(back_path);
1378         }
1379         return 0;
1380 }
1381
1382 /*
1383  * worker function to insert a single pointer in a node.
1384  * the node should have enough room for the pointer already
1385  *
1386  * slot and level indicate where you want the key to go, and
1387  * blocknr is the block the key points to.
1388  *
1389  * returns zero on success and < 0 on any error
1390  */
1391 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1392                       *root, struct btrfs_path *path, struct btrfs_disk_key
1393                       *key, u64 bytenr, int slot, int level)
1394 {
1395         struct extent_buffer *lower;
1396         int nritems;
1397
1398         BUG_ON(!path->nodes[level]);
1399         lower = path->nodes[level];
1400         nritems = btrfs_header_nritems(lower);
1401         if (slot > nritems)
1402                 BUG();
1403         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1404                 BUG();
1405         if (slot != nritems) {
1406                 memmove_extent_buffer(lower,
1407                               btrfs_node_key_ptr_offset(slot + 1),
1408                               btrfs_node_key_ptr_offset(slot),
1409                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1410         }
1411         btrfs_set_node_key(lower, key, slot);
1412         btrfs_set_node_blockptr(lower, slot, bytenr);
1413         WARN_ON(trans->transid == 0);
1414         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1415         btrfs_set_header_nritems(lower, nritems + 1);
1416         btrfs_mark_buffer_dirty(lower);
1417         return 0;
1418 }
1419
1420 /*
1421  * split the node at the specified level in path in two.
1422  * The path is corrected to point to the appropriate node after the split
1423  *
1424  * Before splitting this tries to make some room in the node by pushing
1425  * left and right, if either one works, it returns right away.
1426  *
1427  * returns 0 on success and < 0 on failure
1428  */
1429 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1430                       *root, struct btrfs_path *path, int level)
1431 {
1432         u64 root_gen;
1433         struct extent_buffer *c;
1434         struct extent_buffer *split;
1435         struct btrfs_disk_key disk_key;
1436         int mid;
1437         int ret;
1438         int wret;
1439         u32 c_nritems;
1440
1441         c = path->nodes[level];
1442         WARN_ON(btrfs_header_generation(c) != trans->transid);
1443         if (c == root->node) {
1444                 /* trying to split the root, lets make a new one */
1445                 ret = insert_new_root(trans, root, path, level + 1);
1446                 if (ret)
1447                         return ret;
1448         } else {
1449                 ret = push_nodes_for_insert(trans, root, path, level);
1450                 c = path->nodes[level];
1451                 if (!ret && btrfs_header_nritems(c) <
1452                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
1453                         return 0;
1454                 if (ret < 0)
1455                         return ret;
1456         }
1457
1458         c_nritems = btrfs_header_nritems(c);
1459         if (root->ref_cows)
1460                 root_gen = trans->transid;
1461         else
1462                 root_gen = 0;
1463
1464         btrfs_node_key(c, &disk_key, 0);
1465         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1466                                          root->root_key.objectid,
1467                                          root_gen,
1468                                          btrfs_disk_key_objectid(&disk_key),
1469                                          level, c->start, 0);
1470         if (IS_ERR(split))
1471                 return PTR_ERR(split);
1472
1473         btrfs_set_header_flags(split, btrfs_header_flags(c));
1474         btrfs_set_header_level(split, btrfs_header_level(c));
1475         btrfs_set_header_bytenr(split, split->start);
1476         btrfs_set_header_generation(split, trans->transid);
1477         btrfs_set_header_owner(split, root->root_key.objectid);
1478         write_extent_buffer(split, root->fs_info->fsid,
1479                             (unsigned long)btrfs_header_fsid(split),
1480                             BTRFS_FSID_SIZE);
1481
1482         mid = (c_nritems + 1) / 2;
1483
1484         copy_extent_buffer(split, c,
1485                            btrfs_node_key_ptr_offset(0),
1486                            btrfs_node_key_ptr_offset(mid),
1487                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1488         btrfs_set_header_nritems(split, c_nritems - mid);
1489         btrfs_set_header_nritems(c, mid);
1490         ret = 0;
1491
1492         btrfs_mark_buffer_dirty(c);
1493         btrfs_mark_buffer_dirty(split);
1494
1495         btrfs_node_key(split, &disk_key, 0);
1496         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1497                           path->slots[level + 1] + 1,
1498                           level + 1);
1499         if (wret)
1500                 ret = wret;
1501
1502         if (path->slots[level] >= mid) {
1503                 path->slots[level] -= mid;
1504                 free_extent_buffer(c);
1505                 path->nodes[level] = split;
1506                 path->slots[level + 1] += 1;
1507         } else {
1508                 free_extent_buffer(split);
1509         }
1510         return ret;
1511 }
1512
1513 /*
1514  * how many bytes are required to store the items in a leaf.  start
1515  * and nr indicate which items in the leaf to check.  This totals up the
1516  * space used both by the item structs and the item data
1517  */
1518 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1519 {
1520         int data_len;
1521         int nritems = btrfs_header_nritems(l);
1522         int end = min(nritems, start + nr) - 1;
1523
1524         if (!nr)
1525                 return 0;
1526         data_len = btrfs_item_end_nr(l, start);
1527         data_len = data_len - btrfs_item_offset_nr(l, end);
1528         data_len += sizeof(struct btrfs_item) * nr;
1529         WARN_ON(data_len < 0);
1530         return data_len;
1531 }
1532
1533 /*
1534  * The space between the end of the leaf items and
1535  * the start of the leaf data.  IOW, how much room
1536  * the leaf has left for both items and data
1537  */
1538 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1539 {
1540         int nritems = btrfs_header_nritems(leaf);
1541         int ret;
1542         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1543         if (ret < 0) {
1544                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1545                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1546                        leaf_space_used(leaf, 0, nritems), nritems);
1547         }
1548         return ret;
1549 }
1550
1551 /*
1552  * push some data in the path leaf to the right, trying to free up at
1553  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1554  *
1555  * returns 1 if the push failed because the other node didn't have enough
1556  * room, 0 if everything worked out and < 0 if there were major errors.
1557  */
1558 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1559                            *root, struct btrfs_path *path, int data_size,
1560                            int empty)
1561 {
1562         struct extent_buffer *left = path->nodes[0];
1563         struct extent_buffer *right;
1564         struct extent_buffer *upper;
1565         struct btrfs_disk_key disk_key;
1566         int slot;
1567         u32 i;
1568         int free_space;
1569         int push_space = 0;
1570         int push_items = 0;
1571         struct btrfs_item *item;
1572         u32 left_nritems;
1573         u32 nr;
1574         u32 right_nritems;
1575         u32 data_end;
1576         u32 this_item_size;
1577         int ret;
1578
1579         slot = path->slots[1];
1580         if (!path->nodes[1]) {
1581                 return 1;
1582         }
1583         upper = path->nodes[1];
1584         if (slot >= btrfs_header_nritems(upper) - 1)
1585                 return 1;
1586
1587         right = read_tree_block(root, btrfs_node_blockptr(upper, slot + 1),
1588                                 root->leafsize);
1589         free_space = btrfs_leaf_free_space(root, right);
1590         if (free_space < data_size + sizeof(struct btrfs_item)) {
1591                 free_extent_buffer(right);
1592                 return 1;
1593         }
1594
1595         /* cow and double check */
1596         ret = btrfs_cow_block(trans, root, right, upper,
1597                               slot + 1, &right);
1598         if (ret) {
1599                 free_extent_buffer(right);
1600                 return 1;
1601         }
1602         free_space = btrfs_leaf_free_space(root, right);
1603         if (free_space < data_size + sizeof(struct btrfs_item)) {
1604                 free_extent_buffer(right);
1605                 return 1;
1606         }
1607
1608         left_nritems = btrfs_header_nritems(left);
1609         if (left_nritems == 0) {
1610                 free_extent_buffer(right);
1611                 return 1;
1612         }
1613
1614         if (empty)
1615                 nr = 0;
1616         else
1617                 nr = 1;
1618
1619         i = left_nritems - 1;
1620         while (i >= nr) {
1621                 item = btrfs_item_nr(left, i);
1622
1623                 if (path->slots[0] == i)
1624                         push_space += data_size + sizeof(*item);
1625
1626                 if (!left->map_token) {
1627                         map_extent_buffer(left, (unsigned long)item,
1628                                         sizeof(struct btrfs_item),
1629                                         &left->map_token, &left->kaddr,
1630                                         &left->map_start, &left->map_len,
1631                                         KM_USER1);
1632                 }
1633
1634                 this_item_size = btrfs_item_size(left, item);
1635                 if (this_item_size + sizeof(*item) + push_space > free_space)
1636                         break;
1637                 push_items++;
1638                 push_space += this_item_size + sizeof(*item);
1639                 if (i == 0)
1640                         break;
1641                 i--;
1642         }
1643         if (left->map_token) {
1644                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1645                 left->map_token = NULL;
1646         }
1647
1648         if (push_items == 0) {
1649                 free_extent_buffer(right);
1650                 return 1;
1651         }
1652
1653         if (!empty && push_items == left_nritems)
1654                 WARN_ON(1);
1655
1656         /* push left to right */
1657         right_nritems = btrfs_header_nritems(right);
1658
1659         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1660         push_space -= leaf_data_end(root, left);
1661
1662         /* make room in the right data area */
1663         data_end = leaf_data_end(root, right);
1664         memmove_extent_buffer(right,
1665                               btrfs_leaf_data(right) + data_end - push_space,
1666                               btrfs_leaf_data(right) + data_end,
1667                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1668
1669         /* copy from the left data area */
1670         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1671                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1672                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1673                      push_space);
1674
1675         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1676                               btrfs_item_nr_offset(0),
1677                               right_nritems * sizeof(struct btrfs_item));
1678
1679         /* copy the items from left to right */
1680         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1681                    btrfs_item_nr_offset(left_nritems - push_items),
1682                    push_items * sizeof(struct btrfs_item));
1683
1684         /* update the item pointers */
1685         right_nritems += push_items;
1686         btrfs_set_header_nritems(right, right_nritems);
1687         push_space = BTRFS_LEAF_DATA_SIZE(root);
1688         for (i = 0; i < right_nritems; i++) {
1689                 item = btrfs_item_nr(right, i);
1690                 if (!right->map_token) {
1691                         map_extent_buffer(right, (unsigned long)item,
1692                                         sizeof(struct btrfs_item),
1693                                         &right->map_token, &right->kaddr,
1694                                         &right->map_start, &right->map_len,
1695                                         KM_USER1);
1696                 }
1697                 push_space -= btrfs_item_size(right, item);
1698                 btrfs_set_item_offset(right, item, push_space);
1699         }
1700
1701         if (right->map_token) {
1702                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1703                 right->map_token = NULL;
1704         }
1705         left_nritems -= push_items;
1706         btrfs_set_header_nritems(left, left_nritems);
1707
1708         if (left_nritems)
1709                 btrfs_mark_buffer_dirty(left);
1710         btrfs_mark_buffer_dirty(right);
1711
1712         btrfs_item_key(right, &disk_key, 0);
1713         btrfs_set_node_key(upper, &disk_key, slot + 1);
1714         btrfs_mark_buffer_dirty(upper);
1715
1716         /* then fixup the leaf pointer in the path */
1717         if (path->slots[0] >= left_nritems) {
1718                 path->slots[0] -= left_nritems;
1719                 free_extent_buffer(path->nodes[0]);
1720                 path->nodes[0] = right;
1721                 path->slots[1] += 1;
1722         } else {
1723                 free_extent_buffer(right);
1724         }
1725         return 0;
1726 }
1727 /*
1728  * push some data in the path leaf to the left, trying to free up at
1729  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1730  */
1731 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1732                           *root, struct btrfs_path *path, int data_size,
1733                           int empty)
1734 {
1735         struct btrfs_disk_key disk_key;
1736         struct extent_buffer *right = path->nodes[0];
1737         struct extent_buffer *left;
1738         int slot;
1739         int i;
1740         int free_space;
1741         int push_space = 0;
1742         int push_items = 0;
1743         struct btrfs_item *item;
1744         u32 old_left_nritems;
1745         u32 right_nritems;
1746         u32 nr;
1747         int ret = 0;
1748         int wret;
1749         u32 this_item_size;
1750         u32 old_left_item_size;
1751
1752         slot = path->slots[1];
1753         if (slot == 0)
1754                 return 1;
1755         if (!path->nodes[1])
1756                 return 1;
1757
1758         right_nritems = btrfs_header_nritems(right);
1759         if (right_nritems == 0) {
1760                 return 1;
1761         }
1762
1763         left = read_tree_block(root, btrfs_node_blockptr(path->nodes[1],
1764                                slot - 1), root->leafsize);
1765         free_space = btrfs_leaf_free_space(root, left);
1766         if (free_space < data_size + sizeof(struct btrfs_item)) {
1767                 free_extent_buffer(left);
1768                 return 1;
1769         }
1770
1771         /* cow and double check */
1772         ret = btrfs_cow_block(trans, root, left,
1773                               path->nodes[1], slot - 1, &left);
1774         if (ret) {
1775                 /* we hit -ENOSPC, but it isn't fatal here */
1776                 free_extent_buffer(left);
1777                 return 1;
1778         }
1779
1780         free_space = btrfs_leaf_free_space(root, left);
1781         if (free_space < data_size + sizeof(struct btrfs_item)) {
1782                 free_extent_buffer(left);
1783                 return 1;
1784         }
1785
1786         if (empty)
1787                 nr = right_nritems;
1788         else
1789                 nr = right_nritems - 1;
1790
1791         for (i = 0; i < nr; i++) {
1792                 item = btrfs_item_nr(right, i);
1793                 if (!right->map_token) {
1794                         map_extent_buffer(right, (unsigned long)item,
1795                                         sizeof(struct btrfs_item),
1796                                         &right->map_token, &right->kaddr,
1797                                         &right->map_start, &right->map_len,
1798                                         KM_USER1);
1799                 }
1800
1801                 if (path->slots[0] == i)
1802                         push_space += data_size + sizeof(*item);
1803
1804                 this_item_size = btrfs_item_size(right, item);
1805                 if (this_item_size + sizeof(*item) + push_space > free_space)
1806                         break;
1807
1808                 push_items++;
1809                 push_space += this_item_size + sizeof(*item);
1810         }
1811
1812         if (right->map_token) {
1813                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1814                 right->map_token = NULL;
1815         }
1816
1817         if (push_items == 0) {
1818                 free_extent_buffer(left);
1819                 return 1;
1820         }
1821         if (!empty && push_items == btrfs_header_nritems(right))
1822                 WARN_ON(1);
1823
1824         /* push data from right to left */
1825         copy_extent_buffer(left, right,
1826                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1827                            btrfs_item_nr_offset(0),
1828                            push_items * sizeof(struct btrfs_item));
1829
1830         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1831                      btrfs_item_offset_nr(right, push_items -1);
1832
1833         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1834                      leaf_data_end(root, left) - push_space,
1835                      btrfs_leaf_data(right) +
1836                      btrfs_item_offset_nr(right, push_items - 1),
1837                      push_space);
1838         old_left_nritems = btrfs_header_nritems(left);
1839         BUG_ON(old_left_nritems < 0);
1840
1841         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1842         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1843                 u32 ioff;
1844
1845                 item = btrfs_item_nr(left, i);
1846                 if (!left->map_token) {
1847                         map_extent_buffer(left, (unsigned long)item,
1848                                         sizeof(struct btrfs_item),
1849                                         &left->map_token, &left->kaddr,
1850                                         &left->map_start, &left->map_len,
1851                                         KM_USER1);
1852                 }
1853
1854                 ioff = btrfs_item_offset(left, item);
1855                 btrfs_set_item_offset(left, item,
1856                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1857         }
1858         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1859         if (left->map_token) {
1860                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1861                 left->map_token = NULL;
1862         }
1863
1864         /* fixup right node */
1865         if (push_items > right_nritems) {
1866                 printk("push items %d nr %u\n", push_items, right_nritems);
1867                 WARN_ON(1);
1868         }
1869
1870         if (push_items < right_nritems) {
1871                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1872                                                   leaf_data_end(root, right);
1873                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1874                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1875                                       btrfs_leaf_data(right) +
1876                                       leaf_data_end(root, right), push_space);
1877
1878                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1879                               btrfs_item_nr_offset(push_items),
1880                              (btrfs_header_nritems(right) - push_items) *
1881                              sizeof(struct btrfs_item));
1882         }
1883         right_nritems -= push_items;
1884         btrfs_set_header_nritems(right, right_nritems);
1885         push_space = BTRFS_LEAF_DATA_SIZE(root);
1886         for (i = 0; i < right_nritems; i++) {
1887                 item = btrfs_item_nr(right, i);
1888
1889                 if (!right->map_token) {
1890                         map_extent_buffer(right, (unsigned long)item,
1891                                         sizeof(struct btrfs_item),
1892                                         &right->map_token, &right->kaddr,
1893                                         &right->map_start, &right->map_len,
1894                                         KM_USER1);
1895                 }
1896
1897                 push_space = push_space - btrfs_item_size(right, item);
1898                 btrfs_set_item_offset(right, item, push_space);
1899         }
1900         if (right->map_token) {
1901                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1902                 right->map_token = NULL;
1903         }
1904
1905         btrfs_mark_buffer_dirty(left);
1906         if (right_nritems)
1907                 btrfs_mark_buffer_dirty(right);
1908
1909         btrfs_item_key(right, &disk_key, 0);
1910         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1911         if (wret)
1912                 ret = wret;
1913
1914         /* then fixup the leaf pointer in the path */
1915         if (path->slots[0] < push_items) {
1916                 path->slots[0] += old_left_nritems;
1917                 free_extent_buffer(path->nodes[0]);
1918                 path->nodes[0] = left;
1919                 path->slots[1] -= 1;
1920         } else {
1921                 free_extent_buffer(left);
1922                 path->slots[0] -= push_items;
1923         }
1924         BUG_ON(path->slots[0] < 0);
1925         return ret;
1926 }
1927
1928 /*
1929  * split the path's leaf in two, making sure there is at least data_size
1930  * available for the resulting leaf level of the path.
1931  *
1932  * returns 0 if all went well and < 0 on failure.
1933  */
1934 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1935                       *root, struct btrfs_key *ins_key,
1936                       struct btrfs_path *path, int data_size, int extend)
1937 {
1938         u64 root_gen;
1939         struct extent_buffer *l;
1940         u32 nritems;
1941         int mid;
1942         int slot;
1943         struct extent_buffer *right;
1944         int space_needed = data_size + sizeof(struct btrfs_item);
1945         int data_copy_size;
1946         int rt_data_off;
1947         int i;
1948         int ret = 0;
1949         int wret;
1950         int double_split;
1951         int num_doubles = 0;
1952         struct btrfs_disk_key disk_key;
1953
1954         if (extend)
1955                 space_needed = data_size;
1956
1957         if (root->ref_cows)
1958                 root_gen = trans->transid;
1959         else
1960                 root_gen = 0;
1961
1962         /* first try to make some room by pushing left and right */
1963         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
1964                 wret = push_leaf_right(trans, root, path, data_size, 0);
1965                 if (wret < 0) {
1966                         return wret;
1967                 }
1968                 if (wret) {
1969                         wret = push_leaf_left(trans, root, path, data_size, 0);
1970                         if (wret < 0)
1971                                 return wret;
1972                 }
1973                 l = path->nodes[0];
1974
1975                 /* did the pushes work? */
1976                 if (btrfs_leaf_free_space(root, l) >= space_needed)
1977                         return 0;
1978         }
1979
1980         if (!path->nodes[1]) {
1981                 ret = insert_new_root(trans, root, path, 1);
1982                 if (ret)
1983                         return ret;
1984         }
1985 again:
1986         double_split = 0;
1987         l = path->nodes[0];
1988         slot = path->slots[0];
1989         nritems = btrfs_header_nritems(l);
1990         mid = (nritems + 1)/ 2;
1991
1992         btrfs_item_key(l, &disk_key, 0);
1993
1994         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
1995                                          root->root_key.objectid,
1996                                          root_gen, disk_key.objectid, 0,
1997                                          l->start, 0);
1998         if (IS_ERR(right))
1999                 return PTR_ERR(right);
2000
2001         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2002         btrfs_set_header_bytenr(right, right->start);
2003         btrfs_set_header_generation(right, trans->transid);
2004         btrfs_set_header_owner(right, root->root_key.objectid);
2005         btrfs_set_header_level(right, 0);
2006         write_extent_buffer(right, root->fs_info->fsid,
2007                             (unsigned long)btrfs_header_fsid(right),
2008                             BTRFS_FSID_SIZE);
2009         if (mid <= slot) {
2010                 if (nritems == 1 ||
2011                     leaf_space_used(l, mid, nritems - mid) + space_needed >
2012                         BTRFS_LEAF_DATA_SIZE(root)) {
2013                         if (slot >= nritems) {
2014                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2015                                 btrfs_set_header_nritems(right, 0);
2016                                 wret = insert_ptr(trans, root, path,
2017                                                   &disk_key, right->start,
2018                                                   path->slots[1] + 1, 1);
2019                                 if (wret)
2020                                         ret = wret;
2021                                 free_extent_buffer(path->nodes[0]);
2022                                 path->nodes[0] = right;
2023                                 path->slots[0] = 0;
2024                                 path->slots[1] += 1;
2025                                 return ret;
2026                         }
2027                         mid = slot;
2028                         if (mid != nritems &&
2029                             leaf_space_used(l, mid, nritems - mid) +
2030                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2031                                 double_split = 1;
2032                         }
2033                 }
2034         } else {
2035                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
2036                         BTRFS_LEAF_DATA_SIZE(root)) {
2037                         if (!extend && slot == 0) {
2038                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2039                                 btrfs_set_header_nritems(right, 0);
2040                                 wret = insert_ptr(trans, root, path,
2041                                                   &disk_key,
2042                                                   right->start,
2043                                                   path->slots[1], 1);
2044                                 if (wret)
2045                                         ret = wret;
2046                                 free_extent_buffer(path->nodes[0]);
2047                                 path->nodes[0] = right;
2048                                 path->slots[0] = 0;
2049                                 if (path->slots[1] == 0) {
2050                                         wret = fixup_low_keys(trans, root,
2051                                                    path, &disk_key, 1);
2052                                         if (wret)
2053                                                 ret = wret;
2054                                 }
2055                                 return ret;
2056                         } else if (extend && slot == 0) {
2057                                 mid = 1;
2058                         } else {
2059                                 mid = slot;
2060                                 if (mid != nritems &&
2061                                     leaf_space_used(l, mid, nritems - mid) +
2062                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2063                                         double_split = 1;
2064                                 }
2065                         }
2066                 }
2067         }
2068         nritems = nritems - mid;
2069         btrfs_set_header_nritems(right, nritems);
2070         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2071
2072         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2073                            btrfs_item_nr_offset(mid),
2074                            nritems * sizeof(struct btrfs_item));
2075
2076         copy_extent_buffer(right, l,
2077                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2078                      data_copy_size, btrfs_leaf_data(l) +
2079                      leaf_data_end(root, l), data_copy_size);
2080
2081         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2082                       btrfs_item_end_nr(l, mid);
2083
2084         for (i = 0; i < nritems; i++) {
2085                 struct btrfs_item *item = btrfs_item_nr(right, i);
2086                 u32 ioff;
2087
2088                 if (!right->map_token) {
2089                         map_extent_buffer(right, (unsigned long)item,
2090                                         sizeof(struct btrfs_item),
2091                                         &right->map_token, &right->kaddr,
2092                                         &right->map_start, &right->map_len,
2093                                         KM_USER1);
2094                 }
2095
2096                 ioff = btrfs_item_offset(right, item);
2097                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2098         }
2099
2100         if (right->map_token) {
2101                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2102                 right->map_token = NULL;
2103         }
2104
2105         btrfs_set_header_nritems(l, mid);
2106         ret = 0;
2107         btrfs_item_key(right, &disk_key, 0);
2108         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2109                           path->slots[1] + 1, 1);
2110         if (wret)
2111                 ret = wret;
2112
2113         btrfs_mark_buffer_dirty(right);
2114         btrfs_mark_buffer_dirty(l);
2115         BUG_ON(path->slots[0] != slot);
2116
2117         if (mid <= slot) {
2118                 free_extent_buffer(path->nodes[0]);
2119                 path->nodes[0] = right;
2120                 path->slots[0] -= mid;
2121                 path->slots[1] += 1;
2122         } else
2123                 free_extent_buffer(right);
2124
2125         BUG_ON(path->slots[0] < 0);
2126
2127         if (double_split) {
2128                 BUG_ON(num_doubles != 0);
2129                 num_doubles++;
2130                 goto again;
2131         }
2132         return ret;
2133 }
2134
2135 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2136                         struct btrfs_root *root,
2137                         struct btrfs_path *path,
2138                         u32 new_size, int from_end)
2139 {
2140         int ret = 0;
2141         int slot;
2142         int slot_orig;
2143         struct extent_buffer *leaf;
2144         struct btrfs_item *item;
2145         u32 nritems;
2146         unsigned int data_end;
2147         unsigned int old_data_start;
2148         unsigned int old_size;
2149         unsigned int size_diff;
2150         int i;
2151
2152         slot_orig = path->slots[0];
2153         leaf = path->nodes[0];
2154         slot = path->slots[0];
2155
2156         old_size = btrfs_item_size_nr(leaf, slot);
2157         if (old_size == new_size)
2158                 return 0;
2159
2160         nritems = btrfs_header_nritems(leaf);
2161         data_end = leaf_data_end(root, leaf);
2162
2163         old_data_start = btrfs_item_offset_nr(leaf, slot);
2164
2165         size_diff = old_size - new_size;
2166
2167         BUG_ON(slot < 0);
2168         BUG_ON(slot >= nritems);
2169
2170         /*
2171          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2172          */
2173         /* first correct the data pointers */
2174         for (i = slot; i < nritems; i++) {
2175                 u32 ioff;
2176                 item = btrfs_item_nr(leaf, i);
2177
2178                 if (!leaf->map_token) {
2179                         map_extent_buffer(leaf, (unsigned long)item,
2180                                         sizeof(struct btrfs_item),
2181                                         &leaf->map_token, &leaf->kaddr,
2182                                         &leaf->map_start, &leaf->map_len,
2183                                         KM_USER1);
2184                 }
2185
2186                 ioff = btrfs_item_offset(leaf, item);
2187                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2188         }
2189
2190         if (leaf->map_token) {
2191                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2192                 leaf->map_token = NULL;
2193         }
2194
2195         /* shift the data */
2196         if (from_end) {
2197                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2198                               data_end + size_diff, btrfs_leaf_data(leaf) +
2199                               data_end, old_data_start + new_size - data_end);
2200         } else {
2201                 struct btrfs_disk_key disk_key;
2202                 u64 offset;
2203
2204                 btrfs_item_key(leaf, &disk_key, slot);
2205
2206                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2207                         unsigned long ptr;
2208                         struct btrfs_file_extent_item *fi;
2209
2210                         fi = btrfs_item_ptr(leaf, slot,
2211                                             struct btrfs_file_extent_item);
2212                         fi = (struct btrfs_file_extent_item *)(
2213                              (unsigned long)fi - size_diff);
2214
2215                         if (btrfs_file_extent_type(leaf, fi) ==
2216                             BTRFS_FILE_EXTENT_INLINE) {
2217                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2218                                 memmove_extent_buffer(leaf, ptr,
2219                                         (unsigned long)fi,
2220                                         offsetof(struct btrfs_file_extent_item,
2221                                                  disk_bytenr));
2222                         }
2223                 }
2224
2225                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2226                               data_end + size_diff, btrfs_leaf_data(leaf) +
2227                               data_end, old_data_start - data_end);
2228
2229                 offset = btrfs_disk_key_offset(&disk_key);
2230                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2231                 btrfs_set_item_key(leaf, &disk_key, slot);
2232                 if (slot == 0)
2233                         fixup_low_keys(trans, root, path, &disk_key, 1);
2234         }
2235
2236         item = btrfs_item_nr(leaf, slot);
2237         btrfs_set_item_size(leaf, item, new_size);
2238         btrfs_mark_buffer_dirty(leaf);
2239
2240         ret = 0;
2241         if (btrfs_leaf_free_space(root, leaf) < 0) {
2242                 btrfs_print_leaf(root, leaf);
2243                 BUG();
2244         }
2245         return ret;
2246 }
2247
2248 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2249                       struct btrfs_root *root, struct btrfs_path *path,
2250                       u32 data_size)
2251 {
2252         int ret = 0;
2253         int slot;
2254         int slot_orig;
2255         struct extent_buffer *leaf;
2256         struct btrfs_item *item;
2257         u32 nritems;
2258         unsigned int data_end;
2259         unsigned int old_data;
2260         unsigned int old_size;
2261         int i;
2262
2263         slot_orig = path->slots[0];
2264         leaf = path->nodes[0];
2265
2266         nritems = btrfs_header_nritems(leaf);
2267         data_end = leaf_data_end(root, leaf);
2268
2269         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2270                 btrfs_print_leaf(root, leaf);
2271                 BUG();
2272         }
2273         slot = path->slots[0];
2274         old_data = btrfs_item_end_nr(leaf, slot);
2275
2276         BUG_ON(slot < 0);
2277         if (slot >= nritems) {
2278                 btrfs_print_leaf(root, leaf);
2279                 printk("slot %d too large, nritems %d\n", slot, nritems);
2280                 BUG_ON(1);
2281         }
2282
2283         /*
2284          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2285          */
2286         /* first correct the data pointers */
2287         for (i = slot; i < nritems; i++) {
2288                 u32 ioff;
2289                 item = btrfs_item_nr(leaf, i);
2290
2291                 if (!leaf->map_token) {
2292                         map_extent_buffer(leaf, (unsigned long)item,
2293                                         sizeof(struct btrfs_item),
2294                                         &leaf->map_token, &leaf->kaddr,
2295                                         &leaf->map_start, &leaf->map_len,
2296                                         KM_USER1);
2297                 }
2298                 ioff = btrfs_item_offset(leaf, item);
2299                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2300         }
2301
2302         if (leaf->map_token) {
2303                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2304                 leaf->map_token = NULL;
2305         }
2306
2307         /* shift the data */
2308         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2309                       data_end - data_size, btrfs_leaf_data(leaf) +
2310                       data_end, old_data - data_end);
2311
2312         data_end = old_data;
2313         old_size = btrfs_item_size_nr(leaf, slot);
2314         item = btrfs_item_nr(leaf, slot);
2315         btrfs_set_item_size(leaf, item, old_size + data_size);
2316         btrfs_mark_buffer_dirty(leaf);
2317
2318         ret = 0;
2319         if (btrfs_leaf_free_space(root, leaf) < 0) {
2320                 btrfs_print_leaf(root, leaf);
2321                 BUG();
2322         }
2323         return ret;
2324 }
2325
2326 /*
2327  * Given a key and some data, insert an item into the tree.
2328  * This does all the path init required, making room in the tree if needed.
2329  */
2330 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
2331                             struct btrfs_root *root,
2332                             struct btrfs_path *path,
2333                             struct btrfs_key *cpu_key, u32 data_size)
2334 {
2335         struct extent_buffer *leaf;
2336         struct btrfs_item *item;
2337         int ret = 0;
2338         int slot;
2339         int slot_orig;
2340         u32 nritems;
2341         unsigned int data_end;
2342         struct btrfs_disk_key disk_key;
2343
2344         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2345
2346         /* create a root if there isn't one */
2347         if (!root->node)
2348                 BUG();
2349
2350         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
2351         if (ret == 0) {
2352                 return -EEXIST;
2353         }
2354         if (ret < 0)
2355                 goto out;
2356
2357         slot_orig = path->slots[0];
2358         leaf = path->nodes[0];
2359
2360         nritems = btrfs_header_nritems(leaf);
2361         data_end = leaf_data_end(root, leaf);
2362
2363         if (btrfs_leaf_free_space(root, leaf) <
2364             sizeof(struct btrfs_item) + data_size) {
2365                 btrfs_print_leaf(root, leaf);
2366                 printk("not enough freespace need %u have %d\n",
2367                        data_size, btrfs_leaf_free_space(root, leaf));
2368                 BUG();
2369         }
2370
2371         slot = path->slots[0];
2372         BUG_ON(slot < 0);
2373
2374         if (slot != nritems) {
2375                 int i;
2376                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2377
2378                 if (old_data < data_end) {
2379                         btrfs_print_leaf(root, leaf);
2380                         printk("slot %d old_data %d data_end %d\n",
2381                                slot, old_data, data_end);
2382                         BUG_ON(1);
2383                 }
2384                 /*
2385                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2386                  */
2387                 /* first correct the data pointers */
2388                 WARN_ON(leaf->map_token);
2389                 for (i = slot; i < nritems; i++) {
2390                         u32 ioff;
2391
2392                         item = btrfs_item_nr(leaf, i);
2393                         if (!leaf->map_token) {
2394                                 map_extent_buffer(leaf, (unsigned long)item,
2395                                         sizeof(struct btrfs_item),
2396                                         &leaf->map_token, &leaf->kaddr,
2397                                         &leaf->map_start, &leaf->map_len,
2398                                         KM_USER1);
2399                         }
2400
2401                         ioff = btrfs_item_offset(leaf, item);
2402                         btrfs_set_item_offset(leaf, item, ioff - data_size);
2403                 }
2404                 if (leaf->map_token) {
2405                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2406                         leaf->map_token = NULL;
2407                 }
2408
2409                 /* shift the items */
2410                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
2411                               btrfs_item_nr_offset(slot),
2412                               (nritems - slot) * sizeof(struct btrfs_item));
2413
2414                 /* shift the data */
2415                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2416                               data_end - data_size, btrfs_leaf_data(leaf) +
2417                               data_end, old_data - data_end);
2418                 data_end = old_data;
2419         }
2420
2421         /* setup the item for the new data */
2422         btrfs_set_item_key(leaf, &disk_key, slot);
2423         item = btrfs_item_nr(leaf, slot);
2424         btrfs_set_item_offset(leaf, item, data_end - data_size);
2425         btrfs_set_item_size(leaf, item, data_size);
2426         btrfs_set_header_nritems(leaf, nritems + 1);
2427         btrfs_mark_buffer_dirty(leaf);
2428
2429         ret = 0;
2430         if (slot == 0)
2431                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2432
2433         if (btrfs_leaf_free_space(root, leaf) < 0) {
2434                 btrfs_print_leaf(root, leaf);
2435                 BUG();
2436         }
2437 out:
2438         return ret;
2439 }
2440
2441 /*
2442  * Given a key and some data, insert an item into the tree.
2443  * This does all the path init required, making room in the tree if needed.
2444  */
2445 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2446                       *root, struct btrfs_key *cpu_key, void *data, u32
2447                       data_size)
2448 {
2449         int ret = 0;
2450         struct btrfs_path *path;
2451         struct extent_buffer *leaf;
2452         unsigned long ptr;
2453
2454         path = btrfs_alloc_path();
2455         BUG_ON(!path);
2456         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2457         if (!ret) {
2458                 leaf = path->nodes[0];
2459                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2460                 write_extent_buffer(leaf, data, ptr, data_size);
2461                 btrfs_mark_buffer_dirty(leaf);
2462         }
2463         btrfs_free_path(path);
2464         return ret;
2465 }
2466
2467 /*
2468  * delete the pointer from a given node.
2469  *
2470  * If the delete empties a node, the node is removed from the tree,
2471  * continuing all the way the root if required.  The root is converted into
2472  * a leaf if all the nodes are emptied.
2473  */
2474 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2475                    struct btrfs_path *path, int level, int slot)
2476 {
2477         struct extent_buffer *parent = path->nodes[level];
2478         u32 nritems;
2479         int ret = 0;
2480         int wret;
2481
2482         nritems = btrfs_header_nritems(parent);
2483         if (slot != nritems -1) {
2484                 memmove_extent_buffer(parent,
2485                               btrfs_node_key_ptr_offset(slot),
2486                               btrfs_node_key_ptr_offset(slot + 1),
2487                               sizeof(struct btrfs_key_ptr) *
2488                               (nritems - slot - 1));
2489         }
2490         nritems--;
2491         btrfs_set_header_nritems(parent, nritems);
2492         if (nritems == 0 && parent == root->node) {
2493                 BUG_ON(btrfs_header_level(root->node) != 1);
2494                 /* just turn the root into a leaf and break */
2495                 btrfs_set_header_level(root->node, 0);
2496         } else if (slot == 0) {
2497                 struct btrfs_disk_key disk_key;
2498
2499                 btrfs_node_key(parent, &disk_key, 0);
2500                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2501                 if (wret)
2502                         ret = wret;
2503         }
2504         btrfs_mark_buffer_dirty(parent);
2505         return ret;
2506 }
2507
2508 /*
2509  * delete the item at the leaf level in path.  If that empties
2510  * the leaf, remove it from the tree
2511  */
2512 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2513                    struct btrfs_path *path)
2514 {
2515         int slot;
2516         struct extent_buffer *leaf;
2517         struct btrfs_item *item;
2518         int doff;
2519         int dsize;
2520         int ret = 0;
2521         int wret;
2522         u32 nritems;
2523
2524         leaf = path->nodes[0];
2525         slot = path->slots[0];
2526         doff = btrfs_item_offset_nr(leaf, slot);
2527         dsize = btrfs_item_size_nr(leaf, slot);
2528         nritems = btrfs_header_nritems(leaf);
2529
2530         if (slot != nritems - 1) {
2531                 int i;
2532                 int data_end = leaf_data_end(root, leaf);
2533
2534                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2535                               data_end + dsize,
2536                               btrfs_leaf_data(leaf) + data_end,
2537                               doff - data_end);
2538
2539                 for (i = slot + 1; i < nritems; i++) {
2540                         u32 ioff;
2541
2542                         item = btrfs_item_nr(leaf, i);
2543                         if (!leaf->map_token) {
2544                                 map_extent_buffer(leaf, (unsigned long)item,
2545                                         sizeof(struct btrfs_item),
2546                                         &leaf->map_token, &leaf->kaddr,
2547                                         &leaf->map_start, &leaf->map_len,
2548                                         KM_USER1);
2549                         }
2550                         ioff = btrfs_item_offset(leaf, item);
2551                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2552                 }
2553
2554                 if (leaf->map_token) {
2555                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2556                         leaf->map_token = NULL;
2557                 }
2558
2559                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2560                               btrfs_item_nr_offset(slot + 1),
2561                               sizeof(struct btrfs_item) *
2562                               (nritems - slot - 1));
2563         }
2564         btrfs_set_header_nritems(leaf, nritems - 1);
2565         nritems--;
2566
2567         /* delete the leaf if we've emptied it */
2568         if (nritems == 0) {
2569                 if (leaf == root->node) {
2570                         btrfs_set_header_level(leaf, 0);
2571                 } else {
2572                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2573                         clean_tree_block(trans, root, leaf);
2574                         wait_on_tree_block_writeback(root, leaf);
2575                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2576                         if (wret)
2577                                 ret = wret;
2578                         wret = btrfs_free_extent(trans, root,
2579                                          leaf->start, leaf->len,
2580                                          btrfs_header_owner(path->nodes[1]),
2581                                          root_gen, 0, 0, 1);
2582                         if (wret)
2583                                 ret = wret;
2584                 }
2585         } else {
2586                 int used = leaf_space_used(leaf, 0, nritems);
2587                 if (slot == 0) {
2588                         struct btrfs_disk_key disk_key;
2589
2590                         btrfs_item_key(leaf, &disk_key, 0);
2591                         wret = fixup_low_keys(trans, root, path,
2592                                               &disk_key, 1);
2593                         if (wret)
2594                                 ret = wret;
2595                 }
2596
2597                 /* delete the leaf if it is mostly empty */
2598                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
2599                         /* push_leaf_left fixes the path.
2600                          * make sure the path still points to our leaf
2601                          * for possible call to del_ptr below
2602                          */
2603                         slot = path->slots[1];
2604                         extent_buffer_get(leaf);
2605
2606                         wret = push_leaf_right(trans, root, path, 1, 1);
2607                         if (wret < 0 && wret != -ENOSPC)
2608                                 ret = wret;
2609
2610                         if (path->nodes[0] == leaf &&
2611                             btrfs_header_nritems(leaf)) {
2612                                 wret = push_leaf_left(trans, root, path, 1, 1);
2613                                 if (wret < 0 && wret != -ENOSPC)
2614                                         ret = wret;
2615                         }
2616
2617                         if (btrfs_header_nritems(leaf) == 0) {
2618                                 u64 root_gen;
2619                                 u64 bytenr = leaf->start;
2620                                 u32 blocksize = leaf->len;
2621
2622                                 root_gen = btrfs_header_generation(
2623                                                            path->nodes[1]);
2624
2625                                 clean_tree_block(trans, root, leaf);
2626                                 wait_on_tree_block_writeback(root, leaf);
2627
2628                                 wret = del_ptr(trans, root, path, 1, slot);
2629                                 if (wret)
2630                                         ret = wret;
2631
2632                                 free_extent_buffer(leaf);
2633                                 wret = btrfs_free_extent(trans, root, bytenr,
2634                                              blocksize,
2635                                              btrfs_header_owner(path->nodes[1]),
2636                                              root_gen, 0, 0, 1);
2637                                 if (wret)
2638                                         ret = wret;
2639                         } else {
2640                                 btrfs_mark_buffer_dirty(leaf);
2641                                 free_extent_buffer(leaf);
2642                         }
2643                 } else {
2644                         btrfs_mark_buffer_dirty(leaf);
2645                 }
2646         }
2647         return ret;
2648 }
2649
2650 /*
2651  * walk up the tree as far as required to find the previous leaf.
2652  * returns 0 if it found something or 1 if there are no lesser leaves.
2653  * returns < 0 on io errors.
2654  */
2655 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2656 {
2657         u64 bytenr;
2658         int slot;
2659         int level = 1;
2660         struct extent_buffer *c;
2661         struct extent_buffer *next = NULL;
2662
2663         while(level < BTRFS_MAX_LEVEL) {
2664                 if (!path->nodes[level])
2665                         return 1;
2666
2667                 slot = path->slots[level];
2668                 c = path->nodes[level];
2669                 if (slot == 0) {
2670                         level++;
2671                         if (level == BTRFS_MAX_LEVEL)
2672                                 return 1;
2673                         continue;
2674                 }
2675                 slot--;
2676
2677                 bytenr = btrfs_node_blockptr(c, slot);
2678                 if (next)
2679                         free_extent_buffer(next);
2680
2681                 next = read_tree_block(root, bytenr,
2682                                        btrfs_level_size(root, level - 1));
2683                 break;
2684         }
2685         path->slots[level] = slot;
2686         while(1) {
2687                 level--;
2688                 c = path->nodes[level];
2689                 free_extent_buffer(c);
2690                 slot = btrfs_header_nritems(next);
2691                 if (slot != 0)
2692                         slot--;
2693                 path->nodes[level] = next;
2694                 path->slots[level] = slot;
2695                 if (!level)
2696                         break;
2697                 next = read_tree_block(root, btrfs_node_blockptr(next, slot),
2698                                        btrfs_level_size(root, level - 1));
2699         }
2700         return 0;
2701 }
2702
2703 /*
2704  * walk up the tree as far as required to find the next leaf.
2705  * returns 0 if it found something or 1 if there are no greater leaves.
2706  * returns < 0 on io errors.
2707  */
2708 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2709 {
2710         int slot;
2711         int level = 1;
2712         u64 bytenr;
2713         struct extent_buffer *c;
2714         struct extent_buffer *next = NULL;
2715
2716         while(level < BTRFS_MAX_LEVEL) {
2717                 if (!path->nodes[level])
2718                         return 1;
2719
2720                 slot = path->slots[level] + 1;
2721                 c = path->nodes[level];
2722                 if (slot >= btrfs_header_nritems(c)) {
2723                         level++;
2724                         if (level == BTRFS_MAX_LEVEL)
2725                                 return 1;
2726                         continue;
2727                 }
2728
2729                 bytenr = btrfs_node_blockptr(c, slot);
2730                 if (next)
2731                         free_extent_buffer(next);
2732
2733                 if (path->reada)
2734                         reada_for_search(root, path, level, slot, 0);
2735
2736                 next = read_tree_block(root, bytenr,
2737                                        btrfs_level_size(root, level -1));
2738                 break;
2739         }
2740         path->slots[level] = slot;
2741         while(1) {
2742                 level--;
2743                 c = path->nodes[level];
2744                 free_extent_buffer(c);
2745                 path->nodes[level] = next;
2746                 path->slots[level] = 0;
2747                 if (!level)
2748                         break;
2749                 if (path->reada)
2750                         reada_for_search(root, path, level, 0, 0);
2751                 next = read_tree_block(root, btrfs_node_blockptr(next, 0),
2752                                        btrfs_level_size(root, level - 1));
2753         }
2754         return 0;
2755 }