Btrfs: Handle write errors on raid1 and raid10
[linux-2.6] / fs / btrfs / ctree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "transaction.h"
23 #include "print-tree.h"
24
25 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
26                       *root, struct btrfs_path *path, int level);
27 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
28                       *root, struct btrfs_key *ins_key,
29                       struct btrfs_path *path, int data_size, int extend);
30 static int push_node_left(struct btrfs_trans_handle *trans,
31                           struct btrfs_root *root, struct extent_buffer *dst,
32                           struct extent_buffer *src, int empty);
33 static int balance_node_right(struct btrfs_trans_handle *trans,
34                               struct btrfs_root *root,
35                               struct extent_buffer *dst_buf,
36                               struct extent_buffer *src_buf);
37 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
38                    struct btrfs_path *path, int level, int slot);
39
40 inline void btrfs_init_path(struct btrfs_path *p)
41 {
42         memset(p, 0, sizeof(*p));
43 }
44
45 struct btrfs_path *btrfs_alloc_path(void)
46 {
47         struct btrfs_path *path;
48         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
49         if (path) {
50                 btrfs_init_path(path);
51                 path->reada = 1;
52         }
53         return path;
54 }
55
56 void btrfs_free_path(struct btrfs_path *p)
57 {
58         btrfs_release_path(NULL, p);
59         kmem_cache_free(btrfs_path_cachep, p);
60 }
61
62 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
63 {
64         int i;
65         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
66                 if (!p->nodes[i])
67                         break;
68                 free_extent_buffer(p->nodes[i]);
69         }
70         memset(p, 0, sizeof(*p));
71 }
72
73 static void add_root_to_dirty_list(struct btrfs_root *root)
74 {
75         if (root->track_dirty && list_empty(&root->dirty_list)) {
76                 list_add(&root->dirty_list,
77                          &root->fs_info->dirty_cowonly_roots);
78         }
79 }
80
81 int btrfs_copy_root(struct btrfs_trans_handle *trans,
82                       struct btrfs_root *root,
83                       struct extent_buffer *buf,
84                       struct extent_buffer **cow_ret, u64 new_root_objectid)
85 {
86         struct extent_buffer *cow;
87         u32 nritems;
88         int ret = 0;
89         int level;
90         struct btrfs_key first_key;
91         struct btrfs_root *new_root;
92
93         new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
94         if (!new_root)
95                 return -ENOMEM;
96
97         memcpy(new_root, root, sizeof(*new_root));
98         new_root->root_key.objectid = new_root_objectid;
99
100         WARN_ON(root->ref_cows && trans->transid !=
101                 root->fs_info->running_transaction->transid);
102         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
103
104         level = btrfs_header_level(buf);
105         nritems = btrfs_header_nritems(buf);
106         if (nritems) {
107                 if (level == 0)
108                         btrfs_item_key_to_cpu(buf, &first_key, 0);
109                 else
110                         btrfs_node_key_to_cpu(buf, &first_key, 0);
111         } else {
112                 first_key.objectid = 0;
113         }
114         cow = __btrfs_alloc_free_block(trans, new_root, buf->len,
115                                        new_root_objectid,
116                                        trans->transid, first_key.objectid,
117                                        level, buf->start, 0);
118         if (IS_ERR(cow)) {
119                 kfree(new_root);
120                 return PTR_ERR(cow);
121         }
122
123         copy_extent_buffer(cow, buf, 0, 0, cow->len);
124         btrfs_set_header_bytenr(cow, cow->start);
125         btrfs_set_header_generation(cow, trans->transid);
126         btrfs_set_header_owner(cow, new_root_objectid);
127         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
128
129         WARN_ON(btrfs_header_generation(buf) > trans->transid);
130         ret = btrfs_inc_ref(trans, new_root, buf);
131         kfree(new_root);
132
133         if (ret)
134                 return ret;
135
136         btrfs_mark_buffer_dirty(cow);
137         *cow_ret = cow;
138         return 0;
139 }
140
141 int __btrfs_cow_block(struct btrfs_trans_handle *trans,
142                              struct btrfs_root *root,
143                              struct extent_buffer *buf,
144                              struct extent_buffer *parent, int parent_slot,
145                              struct extent_buffer **cow_ret,
146                              u64 search_start, u64 empty_size)
147 {
148         u64 root_gen;
149         struct extent_buffer *cow;
150         u32 nritems;
151         int ret = 0;
152         int different_trans = 0;
153         int level;
154         struct btrfs_key first_key;
155
156         if (root->ref_cows) {
157                 root_gen = trans->transid;
158         } else {
159                 root_gen = 0;
160         }
161         WARN_ON(root->ref_cows && trans->transid !=
162                 root->fs_info->running_transaction->transid);
163         WARN_ON(root->ref_cows && trans->transid != root->last_trans);
164
165         level = btrfs_header_level(buf);
166         nritems = btrfs_header_nritems(buf);
167         if (nritems) {
168                 if (level == 0)
169                         btrfs_item_key_to_cpu(buf, &first_key, 0);
170                 else
171                         btrfs_node_key_to_cpu(buf, &first_key, 0);
172         } else {
173                 first_key.objectid = 0;
174         }
175         cow = __btrfs_alloc_free_block(trans, root, buf->len,
176                                      root->root_key.objectid,
177                                      root_gen, first_key.objectid, level,
178                                      search_start, empty_size);
179         if (IS_ERR(cow))
180                 return PTR_ERR(cow);
181
182         copy_extent_buffer(cow, buf, 0, 0, cow->len);
183         btrfs_set_header_bytenr(cow, cow->start);
184         btrfs_set_header_generation(cow, trans->transid);
185         btrfs_set_header_owner(cow, root->root_key.objectid);
186         btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
187
188         WARN_ON(btrfs_header_generation(buf) > trans->transid);
189         if (btrfs_header_generation(buf) != trans->transid) {
190                 different_trans = 1;
191                 ret = btrfs_inc_ref(trans, root, buf);
192                 if (ret)
193                         return ret;
194         } else {
195                 clean_tree_block(trans, root, buf);
196         }
197
198         if (buf == root->node) {
199                 root_gen = btrfs_header_generation(buf);
200                 root->node = cow;
201                 extent_buffer_get(cow);
202                 if (buf != root->commit_root) {
203                         btrfs_free_extent(trans, root, buf->start,
204                                           buf->len, root->root_key.objectid,
205                                           root_gen, 0, 0, 1);
206                 }
207                 free_extent_buffer(buf);
208                 add_root_to_dirty_list(root);
209         } else {
210                 root_gen = btrfs_header_generation(parent);
211                 btrfs_set_node_blockptr(parent, parent_slot,
212                                         cow->start);
213                 WARN_ON(trans->transid == 0);
214                 btrfs_set_node_ptr_generation(parent, parent_slot,
215                                               trans->transid);
216                 btrfs_mark_buffer_dirty(parent);
217                 WARN_ON(btrfs_header_generation(parent) != trans->transid);
218                 btrfs_free_extent(trans, root, buf->start, buf->len,
219                                   btrfs_header_owner(parent), root_gen,
220                                   0, 0, 1);
221         }
222         free_extent_buffer(buf);
223         btrfs_mark_buffer_dirty(cow);
224         *cow_ret = cow;
225         return 0;
226 }
227
228 int btrfs_cow_block(struct btrfs_trans_handle *trans,
229                     struct btrfs_root *root, struct extent_buffer *buf,
230                     struct extent_buffer *parent, int parent_slot,
231                     struct extent_buffer **cow_ret)
232 {
233         u64 search_start;
234         u64 header_trans;
235         int ret;
236
237         if (trans->transaction != root->fs_info->running_transaction) {
238                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
239                        root->fs_info->running_transaction->transid);
240                 WARN_ON(1);
241         }
242         if (trans->transid != root->fs_info->generation) {
243                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
244                        root->fs_info->generation);
245                 WARN_ON(1);
246         }
247
248         header_trans = btrfs_header_generation(buf);
249         spin_lock(&root->fs_info->hash_lock);
250         if (header_trans == trans->transid &&
251             !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
252                 *cow_ret = buf;
253                 spin_unlock(&root->fs_info->hash_lock);
254                 return 0;
255         }
256         spin_unlock(&root->fs_info->hash_lock);
257         search_start = buf->start & ~((u64)(1024 * 1024 * 1024) - 1);
258         ret = __btrfs_cow_block(trans, root, buf, parent,
259                                  parent_slot, cow_ret, search_start, 0);
260         return ret;
261 }
262
263 static int close_blocks(u64 blocknr, u64 other, u32 blocksize)
264 {
265         if (blocknr < other && other - (blocknr + blocksize) < 32768)
266                 return 1;
267         if (blocknr > other && blocknr - (other + blocksize) < 32768)
268                 return 1;
269         return 0;
270 }
271
272 /*
273  * compare two keys in a memcmp fashion
274  */
275 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
276 {
277         struct btrfs_key k1;
278
279         btrfs_disk_key_to_cpu(&k1, disk);
280
281         if (k1.objectid > k2->objectid)
282                 return 1;
283         if (k1.objectid < k2->objectid)
284                 return -1;
285         if (k1.type > k2->type)
286                 return 1;
287         if (k1.type < k2->type)
288                 return -1;
289         if (k1.offset > k2->offset)
290                 return 1;
291         if (k1.offset < k2->offset)
292                 return -1;
293         return 0;
294 }
295
296
297 int btrfs_realloc_node(struct btrfs_trans_handle *trans,
298                        struct btrfs_root *root, struct extent_buffer *parent,
299                        int start_slot, int cache_only, u64 *last_ret,
300                        struct btrfs_key *progress)
301 {
302         struct extent_buffer *cur;
303         struct extent_buffer *tmp;
304         u64 blocknr;
305         u64 gen;
306         u64 search_start = *last_ret;
307         u64 last_block = 0;
308         u64 other;
309         u32 parent_nritems;
310         int end_slot;
311         int i;
312         int err = 0;
313         int parent_level;
314         int uptodate;
315         u32 blocksize;
316         int progress_passed = 0;
317         struct btrfs_disk_key disk_key;
318
319         parent_level = btrfs_header_level(parent);
320         if (cache_only && parent_level != 1)
321                 return 0;
322
323         if (trans->transaction != root->fs_info->running_transaction) {
324                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
325                        root->fs_info->running_transaction->transid);
326                 WARN_ON(1);
327         }
328         if (trans->transid != root->fs_info->generation) {
329                 printk(KERN_CRIT "trans %Lu running %Lu\n", trans->transid,
330                        root->fs_info->generation);
331                 WARN_ON(1);
332         }
333
334         parent_nritems = btrfs_header_nritems(parent);
335         blocksize = btrfs_level_size(root, parent_level - 1);
336         end_slot = parent_nritems;
337
338         if (parent_nritems == 1)
339                 return 0;
340
341         for (i = start_slot; i < end_slot; i++) {
342                 int close = 1;
343
344                 if (!parent->map_token) {
345                         map_extent_buffer(parent,
346                                         btrfs_node_key_ptr_offset(i),
347                                         sizeof(struct btrfs_key_ptr),
348                                         &parent->map_token, &parent->kaddr,
349                                         &parent->map_start, &parent->map_len,
350                                         KM_USER1);
351                 }
352                 btrfs_node_key(parent, &disk_key, i);
353                 if (!progress_passed && comp_keys(&disk_key, progress) < 0)
354                         continue;
355
356                 progress_passed = 1;
357                 blocknr = btrfs_node_blockptr(parent, i);
358                 gen = btrfs_node_ptr_generation(parent, i);
359                 if (last_block == 0)
360                         last_block = blocknr;
361
362                 if (i > 0) {
363                         other = btrfs_node_blockptr(parent, i - 1);
364                         close = close_blocks(blocknr, other, blocksize);
365                 }
366                 if (close && i < end_slot - 2) {
367                         other = btrfs_node_blockptr(parent, i + 1);
368                         close = close_blocks(blocknr, other, blocksize);
369                 }
370                 if (close) {
371                         last_block = blocknr;
372                         continue;
373                 }
374                 if (parent->map_token) {
375                         unmap_extent_buffer(parent, parent->map_token,
376                                             KM_USER1);
377                         parent->map_token = NULL;
378                 }
379
380                 cur = btrfs_find_tree_block(root, blocknr, blocksize);
381                 if (cur)
382                         uptodate = btrfs_buffer_uptodate(cur, gen);
383                 else
384                         uptodate = 0;
385                 if (!cur || !uptodate) {
386                         if (cache_only) {
387                                 free_extent_buffer(cur);
388                                 continue;
389                         }
390                         if (!cur) {
391                                 cur = read_tree_block(root, blocknr,
392                                                          blocksize, gen);
393                         } else if (!uptodate) {
394                                 btrfs_read_buffer(cur, gen);
395                         }
396                 }
397                 if (search_start == 0)
398                         search_start = last_block;
399
400                 err = __btrfs_cow_block(trans, root, cur, parent, i,
401                                         &tmp, search_start,
402                                         min(16 * blocksize,
403                                             (end_slot - i) * blocksize));
404                 if (err) {
405                         free_extent_buffer(cur);
406                         break;
407                 }
408                 search_start = tmp->start;
409                 last_block = tmp->start;
410                 *last_ret = search_start;
411                 if (parent_level == 1)
412                         btrfs_clear_buffer_defrag(tmp);
413                 free_extent_buffer(tmp);
414         }
415         if (parent->map_token) {
416                 unmap_extent_buffer(parent, parent->map_token,
417                                     KM_USER1);
418                 parent->map_token = NULL;
419         }
420         return err;
421 }
422
423 /*
424  * The leaf data grows from end-to-front in the node.
425  * this returns the address of the start of the last item,
426  * which is the stop of the leaf data stack
427  */
428 static inline unsigned int leaf_data_end(struct btrfs_root *root,
429                                          struct extent_buffer *leaf)
430 {
431         u32 nr = btrfs_header_nritems(leaf);
432         if (nr == 0)
433                 return BTRFS_LEAF_DATA_SIZE(root);
434         return btrfs_item_offset_nr(leaf, nr - 1);
435 }
436
437 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
438                       int level)
439 {
440         struct extent_buffer *parent = NULL;
441         struct extent_buffer *node = path->nodes[level];
442         struct btrfs_disk_key parent_key;
443         struct btrfs_disk_key node_key;
444         int parent_slot;
445         int slot;
446         struct btrfs_key cpukey;
447         u32 nritems = btrfs_header_nritems(node);
448
449         if (path->nodes[level + 1])
450                 parent = path->nodes[level + 1];
451
452         slot = path->slots[level];
453         BUG_ON(nritems == 0);
454         if (parent) {
455                 parent_slot = path->slots[level + 1];
456                 btrfs_node_key(parent, &parent_key, parent_slot);
457                 btrfs_node_key(node, &node_key, 0);
458                 BUG_ON(memcmp(&parent_key, &node_key,
459                               sizeof(struct btrfs_disk_key)));
460                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
461                        btrfs_header_bytenr(node));
462         }
463         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
464         if (slot != 0) {
465                 btrfs_node_key_to_cpu(node, &cpukey, slot - 1);
466                 btrfs_node_key(node, &node_key, slot);
467                 BUG_ON(comp_keys(&node_key, &cpukey) <= 0);
468         }
469         if (slot < nritems - 1) {
470                 btrfs_node_key_to_cpu(node, &cpukey, slot + 1);
471                 btrfs_node_key(node, &node_key, slot);
472                 BUG_ON(comp_keys(&node_key, &cpukey) >= 0);
473         }
474         return 0;
475 }
476
477 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
478                       int level)
479 {
480         struct extent_buffer *leaf = path->nodes[level];
481         struct extent_buffer *parent = NULL;
482         int parent_slot;
483         struct btrfs_key cpukey;
484         struct btrfs_disk_key parent_key;
485         struct btrfs_disk_key leaf_key;
486         int slot = path->slots[0];
487
488         u32 nritems = btrfs_header_nritems(leaf);
489
490         if (path->nodes[level + 1])
491                 parent = path->nodes[level + 1];
492
493         if (nritems == 0)
494                 return 0;
495
496         if (parent) {
497                 parent_slot = path->slots[level + 1];
498                 btrfs_node_key(parent, &parent_key, parent_slot);
499                 btrfs_item_key(leaf, &leaf_key, 0);
500
501                 BUG_ON(memcmp(&parent_key, &leaf_key,
502                        sizeof(struct btrfs_disk_key)));
503                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
504                        btrfs_header_bytenr(leaf));
505         }
506 #if 0
507         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
508                 btrfs_item_key_to_cpu(leaf, &cpukey, i + 1);
509                 btrfs_item_key(leaf, &leaf_key, i);
510                 if (comp_keys(&leaf_key, &cpukey) >= 0) {
511                         btrfs_print_leaf(root, leaf);
512                         printk("slot %d offset bad key\n", i);
513                         BUG_ON(1);
514                 }
515                 if (btrfs_item_offset_nr(leaf, i) !=
516                         btrfs_item_end_nr(leaf, i + 1)) {
517                         btrfs_print_leaf(root, leaf);
518                         printk("slot %d offset bad\n", i);
519                         BUG_ON(1);
520                 }
521                 if (i == 0) {
522                         if (btrfs_item_offset_nr(leaf, i) +
523                                btrfs_item_size_nr(leaf, i) !=
524                                BTRFS_LEAF_DATA_SIZE(root)) {
525                                 btrfs_print_leaf(root, leaf);
526                                 printk("slot %d first offset bad\n", i);
527                                 BUG_ON(1);
528                         }
529                 }
530         }
531         if (nritems > 0) {
532                 if (btrfs_item_size_nr(leaf, nritems - 1) > 4096) {
533                                 btrfs_print_leaf(root, leaf);
534                                 printk("slot %d bad size \n", nritems - 1);
535                                 BUG_ON(1);
536                 }
537         }
538 #endif
539         if (slot != 0 && slot < nritems - 1) {
540                 btrfs_item_key(leaf, &leaf_key, slot);
541                 btrfs_item_key_to_cpu(leaf, &cpukey, slot - 1);
542                 if (comp_keys(&leaf_key, &cpukey) <= 0) {
543                         btrfs_print_leaf(root, leaf);
544                         printk("slot %d offset bad key\n", slot);
545                         BUG_ON(1);
546                 }
547                 if (btrfs_item_offset_nr(leaf, slot - 1) !=
548                        btrfs_item_end_nr(leaf, slot)) {
549                         btrfs_print_leaf(root, leaf);
550                         printk("slot %d offset bad\n", slot);
551                         BUG_ON(1);
552                 }
553         }
554         if (slot < nritems - 1) {
555                 btrfs_item_key(leaf, &leaf_key, slot);
556                 btrfs_item_key_to_cpu(leaf, &cpukey, slot + 1);
557                 BUG_ON(comp_keys(&leaf_key, &cpukey) >= 0);
558                 if (btrfs_item_offset_nr(leaf, slot) !=
559                         btrfs_item_end_nr(leaf, slot + 1)) {
560                         btrfs_print_leaf(root, leaf);
561                         printk("slot %d offset bad\n", slot);
562                         BUG_ON(1);
563                 }
564         }
565         BUG_ON(btrfs_item_offset_nr(leaf, 0) +
566                btrfs_item_size_nr(leaf, 0) != BTRFS_LEAF_DATA_SIZE(root));
567         return 0;
568 }
569
570 static int noinline check_block(struct btrfs_root *root,
571                                 struct btrfs_path *path, int level)
572 {
573         u64 found_start;
574         return 0;
575         if (btrfs_header_level(path->nodes[level]) != level)
576             printk("warning: bad level %Lu wanted %d found %d\n",
577                    path->nodes[level]->start, level,
578                    btrfs_header_level(path->nodes[level]));
579         found_start = btrfs_header_bytenr(path->nodes[level]);
580         if (found_start != path->nodes[level]->start) {
581             printk("warning: bad bytentr %Lu found %Lu\n",
582                    path->nodes[level]->start, found_start);
583         }
584 #if 0
585         struct extent_buffer *buf = path->nodes[level];
586
587         if (memcmp_extent_buffer(buf, root->fs_info->fsid,
588                                  (unsigned long)btrfs_header_fsid(buf),
589                                  BTRFS_FSID_SIZE)) {
590                 printk("warning bad block %Lu\n", buf->start);
591                 return 1;
592         }
593 #endif
594         if (level == 0)
595                 return check_leaf(root, path, level);
596         return check_node(root, path, level);
597 }
598
599 /*
600  * search for key in the extent_buffer.  The items start at offset p,
601  * and they are item_size apart.  There are 'max' items in p.
602  *
603  * the slot in the array is returned via slot, and it points to
604  * the place where you would insert key if it is not found in
605  * the array.
606  *
607  * slot may point to max if the key is bigger than all of the keys
608  */
609 static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
610                               int item_size, struct btrfs_key *key,
611                               int max, int *slot)
612 {
613         int low = 0;
614         int high = max;
615         int mid;
616         int ret;
617         struct btrfs_disk_key *tmp = NULL;
618         struct btrfs_disk_key unaligned;
619         unsigned long offset;
620         char *map_token = NULL;
621         char *kaddr = NULL;
622         unsigned long map_start = 0;
623         unsigned long map_len = 0;
624         int err;
625
626         while(low < high) {
627                 mid = (low + high) / 2;
628                 offset = p + mid * item_size;
629
630                 if (!map_token || offset < map_start ||
631                     (offset + sizeof(struct btrfs_disk_key)) >
632                     map_start + map_len) {
633                         if (map_token) {
634                                 unmap_extent_buffer(eb, map_token, KM_USER0);
635                                 map_token = NULL;
636                         }
637                         err = map_extent_buffer(eb, offset,
638                                                 sizeof(struct btrfs_disk_key),
639                                                 &map_token, &kaddr,
640                                                 &map_start, &map_len, KM_USER0);
641
642                         if (!err) {
643                                 tmp = (struct btrfs_disk_key *)(kaddr + offset -
644                                                         map_start);
645                         } else {
646                                 read_extent_buffer(eb, &unaligned,
647                                                    offset, sizeof(unaligned));
648                                 tmp = &unaligned;
649                         }
650
651                 } else {
652                         tmp = (struct btrfs_disk_key *)(kaddr + offset -
653                                                         map_start);
654                 }
655                 ret = comp_keys(tmp, key);
656
657                 if (ret < 0)
658                         low = mid + 1;
659                 else if (ret > 0)
660                         high = mid;
661                 else {
662                         *slot = mid;
663                         if (map_token)
664                                 unmap_extent_buffer(eb, map_token, KM_USER0);
665                         return 0;
666                 }
667         }
668         *slot = low;
669         if (map_token)
670                 unmap_extent_buffer(eb, map_token, KM_USER0);
671         return 1;
672 }
673
674 /*
675  * simple bin_search frontend that does the right thing for
676  * leaves vs nodes
677  */
678 static int bin_search(struct extent_buffer *eb, struct btrfs_key *key,
679                       int level, int *slot)
680 {
681         if (level == 0) {
682                 return generic_bin_search(eb,
683                                           offsetof(struct btrfs_leaf, items),
684                                           sizeof(struct btrfs_item),
685                                           key, btrfs_header_nritems(eb),
686                                           slot);
687         } else {
688                 return generic_bin_search(eb,
689                                           offsetof(struct btrfs_node, ptrs),
690                                           sizeof(struct btrfs_key_ptr),
691                                           key, btrfs_header_nritems(eb),
692                                           slot);
693         }
694         return -1;
695 }
696
697 static struct extent_buffer *read_node_slot(struct btrfs_root *root,
698                                    struct extent_buffer *parent, int slot)
699 {
700         int level = btrfs_header_level(parent);
701         if (slot < 0)
702                 return NULL;
703         if (slot >= btrfs_header_nritems(parent))
704                 return NULL;
705
706         BUG_ON(level == 0);
707
708         return read_tree_block(root, btrfs_node_blockptr(parent, slot),
709                        btrfs_level_size(root, level - 1),
710                        btrfs_node_ptr_generation(parent, slot));
711 }
712
713 static int balance_level(struct btrfs_trans_handle *trans,
714                          struct btrfs_root *root,
715                          struct btrfs_path *path, int level)
716 {
717         struct extent_buffer *right = NULL;
718         struct extent_buffer *mid;
719         struct extent_buffer *left = NULL;
720         struct extent_buffer *parent = NULL;
721         int ret = 0;
722         int wret;
723         int pslot;
724         int orig_slot = path->slots[level];
725         int err_on_enospc = 0;
726         u64 orig_ptr;
727
728         if (level == 0)
729                 return 0;
730
731         mid = path->nodes[level];
732         WARN_ON(btrfs_header_generation(mid) != trans->transid);
733
734         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
735
736         if (level < BTRFS_MAX_LEVEL - 1)
737                 parent = path->nodes[level + 1];
738         pslot = path->slots[level + 1];
739
740         /*
741          * deal with the case where there is only one pointer in the root
742          * by promoting the node below to a root
743          */
744         if (!parent) {
745                 struct extent_buffer *child;
746
747                 if (btrfs_header_nritems(mid) != 1)
748                         return 0;
749
750                 /* promote the child to a root */
751                 child = read_node_slot(root, mid, 0);
752                 BUG_ON(!child);
753                 ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
754                 BUG_ON(ret);
755
756                 root->node = child;
757                 add_root_to_dirty_list(root);
758                 path->nodes[level] = NULL;
759                 clean_tree_block(trans, root, mid);
760                 /* once for the path */
761                 free_extent_buffer(mid);
762                 ret = btrfs_free_extent(trans, root, mid->start, mid->len,
763                                         root->root_key.objectid,
764                                         btrfs_header_generation(mid), 0, 0, 1);
765                 /* once for the root ptr */
766                 free_extent_buffer(mid);
767                 return ret;
768         }
769         if (btrfs_header_nritems(mid) >
770             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
771                 return 0;
772
773         if (btrfs_header_nritems(mid) < 2)
774                 err_on_enospc = 1;
775
776         left = read_node_slot(root, parent, pslot - 1);
777         if (left) {
778                 wret = btrfs_cow_block(trans, root, left,
779                                        parent, pslot - 1, &left);
780                 if (wret) {
781                         ret = wret;
782                         goto enospc;
783                 }
784         }
785         right = read_node_slot(root, parent, pslot + 1);
786         if (right) {
787                 wret = btrfs_cow_block(trans, root, right,
788                                        parent, pslot + 1, &right);
789                 if (wret) {
790                         ret = wret;
791                         goto enospc;
792                 }
793         }
794
795         /* first, try to make some room in the middle buffer */
796         if (left) {
797                 orig_slot += btrfs_header_nritems(left);
798                 wret = push_node_left(trans, root, left, mid, 1);
799                 if (wret < 0)
800                         ret = wret;
801                 if (btrfs_header_nritems(mid) < 2)
802                         err_on_enospc = 1;
803         }
804
805         /*
806          * then try to empty the right most buffer into the middle
807          */
808         if (right) {
809                 wret = push_node_left(trans, root, mid, right, 1);
810                 if (wret < 0 && wret != -ENOSPC)
811                         ret = wret;
812                 if (btrfs_header_nritems(right) == 0) {
813                         u64 bytenr = right->start;
814                         u64 generation = btrfs_header_generation(parent);
815                         u32 blocksize = right->len;
816
817                         clean_tree_block(trans, root, right);
818                         free_extent_buffer(right);
819                         right = NULL;
820                         wret = del_ptr(trans, root, path, level + 1, pslot +
821                                        1);
822                         if (wret)
823                                 ret = wret;
824                         wret = btrfs_free_extent(trans, root, bytenr,
825                                                  blocksize,
826                                                  btrfs_header_owner(parent),
827                                                  generation, 0, 0, 1);
828                         if (wret)
829                                 ret = wret;
830                 } else {
831                         struct btrfs_disk_key right_key;
832                         btrfs_node_key(right, &right_key, 0);
833                         btrfs_set_node_key(parent, &right_key, pslot + 1);
834                         btrfs_mark_buffer_dirty(parent);
835                 }
836         }
837         if (btrfs_header_nritems(mid) == 1) {
838                 /*
839                  * we're not allowed to leave a node with one item in the
840                  * tree during a delete.  A deletion from lower in the tree
841                  * could try to delete the only pointer in this node.
842                  * So, pull some keys from the left.
843                  * There has to be a left pointer at this point because
844                  * otherwise we would have pulled some pointers from the
845                  * right
846                  */
847                 BUG_ON(!left);
848                 wret = balance_node_right(trans, root, mid, left);
849                 if (wret < 0) {
850                         ret = wret;
851                         goto enospc;
852                 }
853                 if (wret == 1) {
854                         wret = push_node_left(trans, root, left, mid, 1);
855                         if (wret < 0)
856                                 ret = wret;
857                 }
858                 BUG_ON(wret == 1);
859         }
860         if (btrfs_header_nritems(mid) == 0) {
861                 /* we've managed to empty the middle node, drop it */
862                 u64 root_gen = btrfs_header_generation(parent);
863                 u64 bytenr = mid->start;
864                 u32 blocksize = mid->len;
865                 clean_tree_block(trans, root, mid);
866                 free_extent_buffer(mid);
867                 mid = NULL;
868                 wret = del_ptr(trans, root, path, level + 1, pslot);
869                 if (wret)
870                         ret = wret;
871                 wret = btrfs_free_extent(trans, root, bytenr, blocksize,
872                                          btrfs_header_owner(parent),
873                                          root_gen, 0, 0, 1);
874                 if (wret)
875                         ret = wret;
876         } else {
877                 /* update the parent key to reflect our changes */
878                 struct btrfs_disk_key mid_key;
879                 btrfs_node_key(mid, &mid_key, 0);
880                 btrfs_set_node_key(parent, &mid_key, pslot);
881                 btrfs_mark_buffer_dirty(parent);
882         }
883
884         /* update the path */
885         if (left) {
886                 if (btrfs_header_nritems(left) > orig_slot) {
887                         extent_buffer_get(left);
888                         path->nodes[level] = left;
889                         path->slots[level + 1] -= 1;
890                         path->slots[level] = orig_slot;
891                         if (mid)
892                                 free_extent_buffer(mid);
893                 } else {
894                         orig_slot -= btrfs_header_nritems(left);
895                         path->slots[level] = orig_slot;
896                 }
897         }
898         /* double check we haven't messed things up */
899         check_block(root, path, level);
900         if (orig_ptr !=
901             btrfs_node_blockptr(path->nodes[level], path->slots[level]))
902                 BUG();
903 enospc:
904         if (right)
905                 free_extent_buffer(right);
906         if (left)
907                 free_extent_buffer(left);
908         return ret;
909 }
910
911 /* returns zero if the push worked, non-zero otherwise */
912 static int noinline push_nodes_for_insert(struct btrfs_trans_handle *trans,
913                                           struct btrfs_root *root,
914                                           struct btrfs_path *path, int level)
915 {
916         struct extent_buffer *right = NULL;
917         struct extent_buffer *mid;
918         struct extent_buffer *left = NULL;
919         struct extent_buffer *parent = NULL;
920         int ret = 0;
921         int wret;
922         int pslot;
923         int orig_slot = path->slots[level];
924         u64 orig_ptr;
925
926         if (level == 0)
927                 return 1;
928
929         mid = path->nodes[level];
930         WARN_ON(btrfs_header_generation(mid) != trans->transid);
931         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
932
933         if (level < BTRFS_MAX_LEVEL - 1)
934                 parent = path->nodes[level + 1];
935         pslot = path->slots[level + 1];
936
937         if (!parent)
938                 return 1;
939
940         left = read_node_slot(root, parent, pslot - 1);
941
942         /* first, try to make some room in the middle buffer */
943         if (left) {
944                 u32 left_nr;
945                 left_nr = btrfs_header_nritems(left);
946                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
947                         wret = 1;
948                 } else {
949                         ret = btrfs_cow_block(trans, root, left, parent,
950                                               pslot - 1, &left);
951                         if (ret)
952                                 wret = 1;
953                         else {
954                                 wret = push_node_left(trans, root,
955                                                       left, mid, 0);
956                         }
957                 }
958                 if (wret < 0)
959                         ret = wret;
960                 if (wret == 0) {
961                         struct btrfs_disk_key disk_key;
962                         orig_slot += left_nr;
963                         btrfs_node_key(mid, &disk_key, 0);
964                         btrfs_set_node_key(parent, &disk_key, pslot);
965                         btrfs_mark_buffer_dirty(parent);
966                         if (btrfs_header_nritems(left) > orig_slot) {
967                                 path->nodes[level] = left;
968                                 path->slots[level + 1] -= 1;
969                                 path->slots[level] = orig_slot;
970                                 free_extent_buffer(mid);
971                         } else {
972                                 orig_slot -=
973                                         btrfs_header_nritems(left);
974                                 path->slots[level] = orig_slot;
975                                 free_extent_buffer(left);
976                         }
977                         return 0;
978                 }
979                 free_extent_buffer(left);
980         }
981         right= read_node_slot(root, parent, pslot + 1);
982
983         /*
984          * then try to empty the right most buffer into the middle
985          */
986         if (right) {
987                 u32 right_nr;
988                 right_nr = btrfs_header_nritems(right);
989                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
990                         wret = 1;
991                 } else {
992                         ret = btrfs_cow_block(trans, root, right,
993                                               parent, pslot + 1,
994                                               &right);
995                         if (ret)
996                                 wret = 1;
997                         else {
998                                 wret = balance_node_right(trans, root,
999                                                           right, mid);
1000                         }
1001                 }
1002                 if (wret < 0)
1003                         ret = wret;
1004                 if (wret == 0) {
1005                         struct btrfs_disk_key disk_key;
1006
1007                         btrfs_node_key(right, &disk_key, 0);
1008                         btrfs_set_node_key(parent, &disk_key, pslot + 1);
1009                         btrfs_mark_buffer_dirty(parent);
1010
1011                         if (btrfs_header_nritems(mid) <= orig_slot) {
1012                                 path->nodes[level] = right;
1013                                 path->slots[level + 1] += 1;
1014                                 path->slots[level] = orig_slot -
1015                                         btrfs_header_nritems(mid);
1016                                 free_extent_buffer(mid);
1017                         } else {
1018                                 free_extent_buffer(right);
1019                         }
1020                         return 0;
1021                 }
1022                 free_extent_buffer(right);
1023         }
1024         return 1;
1025 }
1026
1027 /*
1028  * readahead one full node of leaves
1029  */
1030 static void reada_for_search(struct btrfs_root *root, struct btrfs_path *path,
1031                              int level, int slot, u64 objectid)
1032 {
1033         struct extent_buffer *node;
1034         struct btrfs_disk_key disk_key;
1035         u32 nritems;
1036         u64 search;
1037         u64 lowest_read;
1038         u64 highest_read;
1039         u64 nread = 0;
1040         int direction = path->reada;
1041         struct extent_buffer *eb;
1042         u32 nr;
1043         u32 blocksize;
1044         u32 nscan = 0;
1045
1046         if (level != 1)
1047                 return;
1048
1049         if (!path->nodes[level])
1050                 return;
1051
1052         node = path->nodes[level];
1053         search = btrfs_node_blockptr(node, slot);
1054         blocksize = btrfs_level_size(root, level - 1);
1055         eb = btrfs_find_tree_block(root, search, blocksize);
1056         if (eb) {
1057                 free_extent_buffer(eb);
1058                 return;
1059         }
1060
1061         highest_read = search;
1062         lowest_read = search;
1063
1064         nritems = btrfs_header_nritems(node);
1065         nr = slot;
1066         while(1) {
1067                 if (direction < 0) {
1068                         if (nr == 0)
1069                                 break;
1070                         nr--;
1071                 } else if (direction > 0) {
1072                         nr++;
1073                         if (nr >= nritems)
1074                                 break;
1075                 }
1076                 if (path->reada < 0 && objectid) {
1077                         btrfs_node_key(node, &disk_key, nr);
1078                         if (btrfs_disk_key_objectid(&disk_key) != objectid)
1079                                 break;
1080                 }
1081                 search = btrfs_node_blockptr(node, nr);
1082                 if ((search >= lowest_read && search <= highest_read) ||
1083                     (search < lowest_read && lowest_read - search <= 32768) ||
1084                     (search > highest_read && search - highest_read <= 32768)) {
1085                         readahead_tree_block(root, search, blocksize,
1086                                      btrfs_node_ptr_generation(node, nr));
1087                         nread += blocksize;
1088                 }
1089                 nscan++;
1090                 if (path->reada < 2 && (nread > (256 * 1024) || nscan > 32))
1091                         break;
1092                 if(nread > (1024 * 1024) || nscan > 128)
1093                         break;
1094
1095                 if (search < lowest_read)
1096                         lowest_read = search;
1097                 if (search > highest_read)
1098                         highest_read = search;
1099         }
1100 }
1101 /*
1102  * look for key in the tree.  path is filled in with nodes along the way
1103  * if key is found, we return zero and you can find the item in the leaf
1104  * level of the path (level 0)
1105  *
1106  * If the key isn't found, the path points to the slot where it should
1107  * be inserted, and 1 is returned.  If there are other errors during the
1108  * search a negative error number is returned.
1109  *
1110  * if ins_len > 0, nodes and leaves will be split as we walk down the
1111  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
1112  * possible)
1113  */
1114 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
1115                       *root, struct btrfs_key *key, struct btrfs_path *p, int
1116                       ins_len, int cow)
1117 {
1118         struct extent_buffer *b;
1119         int slot;
1120         int ret;
1121         int level;
1122         int should_reada = p->reada;
1123         u8 lowest_level = 0;
1124
1125         lowest_level = p->lowest_level;
1126         WARN_ON(lowest_level && ins_len);
1127         WARN_ON(p->nodes[0] != NULL);
1128         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
1129 again:
1130         b = root->node;
1131         extent_buffer_get(b);
1132         while (b) {
1133                 level = btrfs_header_level(b);
1134                 if (cow) {
1135                         int wret;
1136                         wret = btrfs_cow_block(trans, root, b,
1137                                                p->nodes[level + 1],
1138                                                p->slots[level + 1],
1139                                                &b);
1140                         if (wret) {
1141                                 free_extent_buffer(b);
1142                                 return wret;
1143                         }
1144                 }
1145                 BUG_ON(!cow && ins_len);
1146                 if (level != btrfs_header_level(b))
1147                         WARN_ON(1);
1148                 level = btrfs_header_level(b);
1149                 p->nodes[level] = b;
1150                 ret = check_block(root, p, level);
1151                 if (ret)
1152                         return -1;
1153                 ret = bin_search(b, key, level, &slot);
1154                 if (level != 0) {
1155                         if (ret && slot > 0)
1156                                 slot -= 1;
1157                         p->slots[level] = slot;
1158                         if (ins_len > 0 && btrfs_header_nritems(b) >=
1159                             BTRFS_NODEPTRS_PER_BLOCK(root) - 3) {
1160                                 int sret = split_node(trans, root, p, level);
1161                                 BUG_ON(sret > 0);
1162                                 if (sret)
1163                                         return sret;
1164                                 b = p->nodes[level];
1165                                 slot = p->slots[level];
1166                         } else if (ins_len < 0) {
1167                                 int sret = balance_level(trans, root, p,
1168                                                          level);
1169                                 if (sret)
1170                                         return sret;
1171                                 b = p->nodes[level];
1172                                 if (!b) {
1173                                         btrfs_release_path(NULL, p);
1174                                         goto again;
1175                                 }
1176                                 slot = p->slots[level];
1177                                 BUG_ON(btrfs_header_nritems(b) == 1);
1178                         }
1179                         /* this is only true while dropping a snapshot */
1180                         if (level == lowest_level)
1181                                 break;
1182
1183                         if (should_reada)
1184                                 reada_for_search(root, p, level, slot,
1185                                                  key->objectid);
1186
1187                         b = read_node_slot(root, b, slot);
1188                 } else {
1189                         p->slots[level] = slot;
1190                         if (ins_len > 0 && btrfs_leaf_free_space(root, b) <
1191                             sizeof(struct btrfs_item) + ins_len) {
1192                                 int sret = split_leaf(trans, root, key,
1193                                                       p, ins_len, ret == 0);
1194                                 BUG_ON(sret > 0);
1195                                 if (sret)
1196                                         return sret;
1197                         }
1198                         return ret;
1199                 }
1200         }
1201         return 1;
1202 }
1203
1204 /*
1205  * adjust the pointers going up the tree, starting at level
1206  * making sure the right key of each node is points to 'key'.
1207  * This is used after shifting pointers to the left, so it stops
1208  * fixing up pointers when a given leaf/node is not in slot 0 of the
1209  * higher levels
1210  *
1211  * If this fails to write a tree block, it returns -1, but continues
1212  * fixing up the blocks in ram so the tree is consistent.
1213  */
1214 static int fixup_low_keys(struct btrfs_trans_handle *trans,
1215                           struct btrfs_root *root, struct btrfs_path *path,
1216                           struct btrfs_disk_key *key, int level)
1217 {
1218         int i;
1219         int ret = 0;
1220         struct extent_buffer *t;
1221
1222         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
1223                 int tslot = path->slots[i];
1224                 if (!path->nodes[i])
1225                         break;
1226                 t = path->nodes[i];
1227                 btrfs_set_node_key(t, key, tslot);
1228                 btrfs_mark_buffer_dirty(path->nodes[i]);
1229                 if (tslot != 0)
1230                         break;
1231         }
1232         return ret;
1233 }
1234
1235 /*
1236  * try to push data from one node into the next node left in the
1237  * tree.
1238  *
1239  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
1240  * error, and > 0 if there was no room in the left hand block.
1241  */
1242 static int push_node_left(struct btrfs_trans_handle *trans,
1243                           struct btrfs_root *root, struct extent_buffer *dst,
1244                           struct extent_buffer *src, int empty)
1245 {
1246         int push_items = 0;
1247         int src_nritems;
1248         int dst_nritems;
1249         int ret = 0;
1250
1251         src_nritems = btrfs_header_nritems(src);
1252         dst_nritems = btrfs_header_nritems(dst);
1253         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1254         WARN_ON(btrfs_header_generation(src) != trans->transid);
1255         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1256
1257         if (!empty && src_nritems <= 8)
1258                 return 1;
1259
1260         if (push_items <= 0) {
1261                 return 1;
1262         }
1263
1264         if (empty) {
1265                 push_items = min(src_nritems, push_items);
1266                 if (push_items < src_nritems) {
1267                         /* leave at least 8 pointers in the node if
1268                          * we aren't going to empty it
1269                          */
1270                         if (src_nritems - push_items < 8) {
1271                                 if (push_items <= 8)
1272                                         return 1;
1273                                 push_items -= 8;
1274                         }
1275                 }
1276         } else
1277                 push_items = min(src_nritems - 8, push_items);
1278
1279         copy_extent_buffer(dst, src,
1280                            btrfs_node_key_ptr_offset(dst_nritems),
1281                            btrfs_node_key_ptr_offset(0),
1282                            push_items * sizeof(struct btrfs_key_ptr));
1283
1284         if (push_items < src_nritems) {
1285                 memmove_extent_buffer(src, btrfs_node_key_ptr_offset(0),
1286                                       btrfs_node_key_ptr_offset(push_items),
1287                                       (src_nritems - push_items) *
1288                                       sizeof(struct btrfs_key_ptr));
1289         }
1290         btrfs_set_header_nritems(src, src_nritems - push_items);
1291         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1292         btrfs_mark_buffer_dirty(src);
1293         btrfs_mark_buffer_dirty(dst);
1294         return ret;
1295 }
1296
1297 /*
1298  * try to push data from one node into the next node right in the
1299  * tree.
1300  *
1301  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
1302  * error, and > 0 if there was no room in the right hand block.
1303  *
1304  * this will  only push up to 1/2 the contents of the left node over
1305  */
1306 static int balance_node_right(struct btrfs_trans_handle *trans,
1307                               struct btrfs_root *root,
1308                               struct extent_buffer *dst,
1309                               struct extent_buffer *src)
1310 {
1311         int push_items = 0;
1312         int max_push;
1313         int src_nritems;
1314         int dst_nritems;
1315         int ret = 0;
1316
1317         WARN_ON(btrfs_header_generation(src) != trans->transid);
1318         WARN_ON(btrfs_header_generation(dst) != trans->transid);
1319
1320         src_nritems = btrfs_header_nritems(src);
1321         dst_nritems = btrfs_header_nritems(dst);
1322         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
1323         if (push_items <= 0) {
1324                 return 1;
1325         }
1326
1327         if (src_nritems < 4) {
1328                 return 1;
1329         }
1330
1331         max_push = src_nritems / 2 + 1;
1332         /* don't try to empty the node */
1333         if (max_push >= src_nritems) {
1334                 return 1;
1335         }
1336
1337         if (max_push < push_items)
1338                 push_items = max_push;
1339
1340         memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(push_items),
1341                                       btrfs_node_key_ptr_offset(0),
1342                                       (dst_nritems) *
1343                                       sizeof(struct btrfs_key_ptr));
1344
1345         copy_extent_buffer(dst, src,
1346                            btrfs_node_key_ptr_offset(0),
1347                            btrfs_node_key_ptr_offset(src_nritems - push_items),
1348                            push_items * sizeof(struct btrfs_key_ptr));
1349
1350         btrfs_set_header_nritems(src, src_nritems - push_items);
1351         btrfs_set_header_nritems(dst, dst_nritems + push_items);
1352
1353         btrfs_mark_buffer_dirty(src);
1354         btrfs_mark_buffer_dirty(dst);
1355         return ret;
1356 }
1357
1358 /*
1359  * helper function to insert a new root level in the tree.
1360  * A new node is allocated, and a single item is inserted to
1361  * point to the existing root
1362  *
1363  * returns zero on success or < 0 on failure.
1364  */
1365 static int noinline insert_new_root(struct btrfs_trans_handle *trans,
1366                            struct btrfs_root *root,
1367                            struct btrfs_path *path, int level)
1368 {
1369         u64 root_gen;
1370         u64 lower_gen;
1371         struct extent_buffer *lower;
1372         struct extent_buffer *c;
1373         struct btrfs_disk_key lower_key;
1374
1375         BUG_ON(path->nodes[level]);
1376         BUG_ON(path->nodes[level-1] != root->node);
1377
1378         if (root->ref_cows)
1379                 root_gen = trans->transid;
1380         else
1381                 root_gen = 0;
1382
1383         lower = path->nodes[level-1];
1384         if (level == 1)
1385                 btrfs_item_key(lower, &lower_key, 0);
1386         else
1387                 btrfs_node_key(lower, &lower_key, 0);
1388
1389         c = __btrfs_alloc_free_block(trans, root, root->nodesize,
1390                                    root->root_key.objectid,
1391                                    root_gen, lower_key.objectid, level,
1392                                    root->node->start, 0);
1393         if (IS_ERR(c))
1394                 return PTR_ERR(c);
1395         memset_extent_buffer(c, 0, 0, root->nodesize);
1396         btrfs_set_header_nritems(c, 1);
1397         btrfs_set_header_level(c, level);
1398         btrfs_set_header_bytenr(c, c->start);
1399         btrfs_set_header_generation(c, trans->transid);
1400         btrfs_set_header_owner(c, root->root_key.objectid);
1401
1402         write_extent_buffer(c, root->fs_info->fsid,
1403                             (unsigned long)btrfs_header_fsid(c),
1404                             BTRFS_FSID_SIZE);
1405
1406         write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
1407                             (unsigned long)btrfs_header_chunk_tree_uuid(c),
1408                             BTRFS_UUID_SIZE);
1409
1410         btrfs_set_node_key(c, &lower_key, 0);
1411         btrfs_set_node_blockptr(c, 0, lower->start);
1412         lower_gen = btrfs_header_generation(lower);
1413         WARN_ON(lower_gen == 0);
1414
1415         btrfs_set_node_ptr_generation(c, 0, lower_gen);
1416
1417         btrfs_mark_buffer_dirty(c);
1418
1419         /* the super has an extra ref to root->node */
1420         free_extent_buffer(root->node);
1421         root->node = c;
1422         add_root_to_dirty_list(root);
1423         extent_buffer_get(c);
1424         path->nodes[level] = c;
1425         path->slots[level] = 0;
1426
1427         if (root->ref_cows && lower_gen != trans->transid) {
1428                 struct btrfs_path *back_path = btrfs_alloc_path();
1429                 int ret;
1430                 ret = btrfs_insert_extent_backref(trans,
1431                                                   root->fs_info->extent_root,
1432                                                   path, lower->start,
1433                                                   root->root_key.objectid,
1434                                                   trans->transid, 0, 0);
1435                 BUG_ON(ret);
1436                 btrfs_free_path(back_path);
1437         }
1438         return 0;
1439 }
1440
1441 /*
1442  * worker function to insert a single pointer in a node.
1443  * the node should have enough room for the pointer already
1444  *
1445  * slot and level indicate where you want the key to go, and
1446  * blocknr is the block the key points to.
1447  *
1448  * returns zero on success and < 0 on any error
1449  */
1450 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
1451                       *root, struct btrfs_path *path, struct btrfs_disk_key
1452                       *key, u64 bytenr, int slot, int level)
1453 {
1454         struct extent_buffer *lower;
1455         int nritems;
1456
1457         BUG_ON(!path->nodes[level]);
1458         lower = path->nodes[level];
1459         nritems = btrfs_header_nritems(lower);
1460         if (slot > nritems)
1461                 BUG();
1462         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
1463                 BUG();
1464         if (slot != nritems) {
1465                 memmove_extent_buffer(lower,
1466                               btrfs_node_key_ptr_offset(slot + 1),
1467                               btrfs_node_key_ptr_offset(slot),
1468                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
1469         }
1470         btrfs_set_node_key(lower, key, slot);
1471         btrfs_set_node_blockptr(lower, slot, bytenr);
1472         WARN_ON(trans->transid == 0);
1473         btrfs_set_node_ptr_generation(lower, slot, trans->transid);
1474         btrfs_set_header_nritems(lower, nritems + 1);
1475         btrfs_mark_buffer_dirty(lower);
1476         return 0;
1477 }
1478
1479 /*
1480  * split the node at the specified level in path in two.
1481  * The path is corrected to point to the appropriate node after the split
1482  *
1483  * Before splitting this tries to make some room in the node by pushing
1484  * left and right, if either one works, it returns right away.
1485  *
1486  * returns 0 on success and < 0 on failure
1487  */
1488 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
1489                       *root, struct btrfs_path *path, int level)
1490 {
1491         u64 root_gen;
1492         struct extent_buffer *c;
1493         struct extent_buffer *split;
1494         struct btrfs_disk_key disk_key;
1495         int mid;
1496         int ret;
1497         int wret;
1498         u32 c_nritems;
1499
1500         c = path->nodes[level];
1501         WARN_ON(btrfs_header_generation(c) != trans->transid);
1502         if (c == root->node) {
1503                 /* trying to split the root, lets make a new one */
1504                 ret = insert_new_root(trans, root, path, level + 1);
1505                 if (ret)
1506                         return ret;
1507         } else {
1508                 ret = push_nodes_for_insert(trans, root, path, level);
1509                 c = path->nodes[level];
1510                 if (!ret && btrfs_header_nritems(c) <
1511                     BTRFS_NODEPTRS_PER_BLOCK(root) - 3)
1512                         return 0;
1513                 if (ret < 0)
1514                         return ret;
1515         }
1516
1517         c_nritems = btrfs_header_nritems(c);
1518         if (root->ref_cows)
1519                 root_gen = trans->transid;
1520         else
1521                 root_gen = 0;
1522
1523         btrfs_node_key(c, &disk_key, 0);
1524         split = __btrfs_alloc_free_block(trans, root, root->nodesize,
1525                                          root->root_key.objectid,
1526                                          root_gen,
1527                                          btrfs_disk_key_objectid(&disk_key),
1528                                          level, c->start, 0);
1529         if (IS_ERR(split))
1530                 return PTR_ERR(split);
1531
1532         btrfs_set_header_flags(split, btrfs_header_flags(c));
1533         btrfs_set_header_level(split, btrfs_header_level(c));
1534         btrfs_set_header_bytenr(split, split->start);
1535         btrfs_set_header_generation(split, trans->transid);
1536         btrfs_set_header_owner(split, root->root_key.objectid);
1537         btrfs_set_header_flags(split, 0);
1538         write_extent_buffer(split, root->fs_info->fsid,
1539                             (unsigned long)btrfs_header_fsid(split),
1540                             BTRFS_FSID_SIZE);
1541         write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
1542                             (unsigned long)btrfs_header_chunk_tree_uuid(split),
1543                             BTRFS_UUID_SIZE);
1544
1545         mid = (c_nritems + 1) / 2;
1546
1547         copy_extent_buffer(split, c,
1548                            btrfs_node_key_ptr_offset(0),
1549                            btrfs_node_key_ptr_offset(mid),
1550                            (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
1551         btrfs_set_header_nritems(split, c_nritems - mid);
1552         btrfs_set_header_nritems(c, mid);
1553         ret = 0;
1554
1555         btrfs_mark_buffer_dirty(c);
1556         btrfs_mark_buffer_dirty(split);
1557
1558         btrfs_node_key(split, &disk_key, 0);
1559         wret = insert_ptr(trans, root, path, &disk_key, split->start,
1560                           path->slots[level + 1] + 1,
1561                           level + 1);
1562         if (wret)
1563                 ret = wret;
1564
1565         if (path->slots[level] >= mid) {
1566                 path->slots[level] -= mid;
1567                 free_extent_buffer(c);
1568                 path->nodes[level] = split;
1569                 path->slots[level + 1] += 1;
1570         } else {
1571                 free_extent_buffer(split);
1572         }
1573         return ret;
1574 }
1575
1576 /*
1577  * how many bytes are required to store the items in a leaf.  start
1578  * and nr indicate which items in the leaf to check.  This totals up the
1579  * space used both by the item structs and the item data
1580  */
1581 static int leaf_space_used(struct extent_buffer *l, int start, int nr)
1582 {
1583         int data_len;
1584         int nritems = btrfs_header_nritems(l);
1585         int end = min(nritems, start + nr) - 1;
1586
1587         if (!nr)
1588                 return 0;
1589         data_len = btrfs_item_end_nr(l, start);
1590         data_len = data_len - btrfs_item_offset_nr(l, end);
1591         data_len += sizeof(struct btrfs_item) * nr;
1592         WARN_ON(data_len < 0);
1593         return data_len;
1594 }
1595
1596 /*
1597  * The space between the end of the leaf items and
1598  * the start of the leaf data.  IOW, how much room
1599  * the leaf has left for both items and data
1600  */
1601 int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf)
1602 {
1603         int nritems = btrfs_header_nritems(leaf);
1604         int ret;
1605         ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
1606         if (ret < 0) {
1607                 printk("leaf free space ret %d, leaf data size %lu, used %d nritems %d\n",
1608                        ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
1609                        leaf_space_used(leaf, 0, nritems), nritems);
1610         }
1611         return ret;
1612 }
1613
1614 /*
1615  * push some data in the path leaf to the right, trying to free up at
1616  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1617  *
1618  * returns 1 if the push failed because the other node didn't have enough
1619  * room, 0 if everything worked out and < 0 if there were major errors.
1620  */
1621 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
1622                            *root, struct btrfs_path *path, int data_size,
1623                            int empty)
1624 {
1625         struct extent_buffer *left = path->nodes[0];
1626         struct extent_buffer *right;
1627         struct extent_buffer *upper;
1628         struct btrfs_disk_key disk_key;
1629         int slot;
1630         u32 i;
1631         int free_space;
1632         int push_space = 0;
1633         int push_items = 0;
1634         struct btrfs_item *item;
1635         u32 left_nritems;
1636         u32 nr;
1637         u32 right_nritems;
1638         u32 data_end;
1639         u32 this_item_size;
1640         int ret;
1641
1642         slot = path->slots[1];
1643         if (!path->nodes[1]) {
1644                 return 1;
1645         }
1646         upper = path->nodes[1];
1647         if (slot >= btrfs_header_nritems(upper) - 1)
1648                 return 1;
1649
1650         right = read_node_slot(root, upper, slot + 1);
1651         free_space = btrfs_leaf_free_space(root, right);
1652         if (free_space < data_size + sizeof(struct btrfs_item)) {
1653                 free_extent_buffer(right);
1654                 return 1;
1655         }
1656
1657         /* cow and double check */
1658         ret = btrfs_cow_block(trans, root, right, upper,
1659                               slot + 1, &right);
1660         if (ret) {
1661                 free_extent_buffer(right);
1662                 return 1;
1663         }
1664         free_space = btrfs_leaf_free_space(root, right);
1665         if (free_space < data_size + sizeof(struct btrfs_item)) {
1666                 free_extent_buffer(right);
1667                 return 1;
1668         }
1669
1670         left_nritems = btrfs_header_nritems(left);
1671         if (left_nritems == 0) {
1672                 free_extent_buffer(right);
1673                 return 1;
1674         }
1675
1676         if (empty)
1677                 nr = 0;
1678         else
1679                 nr = 1;
1680
1681         i = left_nritems - 1;
1682         while (i >= nr) {
1683                 item = btrfs_item_nr(left, i);
1684
1685                 if (path->slots[0] == i)
1686                         push_space += data_size + sizeof(*item);
1687
1688                 if (!left->map_token) {
1689                         map_extent_buffer(left, (unsigned long)item,
1690                                         sizeof(struct btrfs_item),
1691                                         &left->map_token, &left->kaddr,
1692                                         &left->map_start, &left->map_len,
1693                                         KM_USER1);
1694                 }
1695
1696                 this_item_size = btrfs_item_size(left, item);
1697                 if (this_item_size + sizeof(*item) + push_space > free_space)
1698                         break;
1699                 push_items++;
1700                 push_space += this_item_size + sizeof(*item);
1701                 if (i == 0)
1702                         break;
1703                 i--;
1704         }
1705         if (left->map_token) {
1706                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1707                 left->map_token = NULL;
1708         }
1709
1710         if (push_items == 0) {
1711                 free_extent_buffer(right);
1712                 return 1;
1713         }
1714
1715         if (!empty && push_items == left_nritems)
1716                 WARN_ON(1);
1717
1718         /* push left to right */
1719         right_nritems = btrfs_header_nritems(right);
1720
1721         push_space = btrfs_item_end_nr(left, left_nritems - push_items);
1722         push_space -= leaf_data_end(root, left);
1723
1724         /* make room in the right data area */
1725         data_end = leaf_data_end(root, right);
1726         memmove_extent_buffer(right,
1727                               btrfs_leaf_data(right) + data_end - push_space,
1728                               btrfs_leaf_data(right) + data_end,
1729                               BTRFS_LEAF_DATA_SIZE(root) - data_end);
1730
1731         /* copy from the left data area */
1732         copy_extent_buffer(right, left, btrfs_leaf_data(right) +
1733                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1734                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1735                      push_space);
1736
1737         memmove_extent_buffer(right, btrfs_item_nr_offset(push_items),
1738                               btrfs_item_nr_offset(0),
1739                               right_nritems * sizeof(struct btrfs_item));
1740
1741         /* copy the items from left to right */
1742         copy_extent_buffer(right, left, btrfs_item_nr_offset(0),
1743                    btrfs_item_nr_offset(left_nritems - push_items),
1744                    push_items * sizeof(struct btrfs_item));
1745
1746         /* update the item pointers */
1747         right_nritems += push_items;
1748         btrfs_set_header_nritems(right, right_nritems);
1749         push_space = BTRFS_LEAF_DATA_SIZE(root);
1750         for (i = 0; i < right_nritems; i++) {
1751                 item = btrfs_item_nr(right, i);
1752                 if (!right->map_token) {
1753                         map_extent_buffer(right, (unsigned long)item,
1754                                         sizeof(struct btrfs_item),
1755                                         &right->map_token, &right->kaddr,
1756                                         &right->map_start, &right->map_len,
1757                                         KM_USER1);
1758                 }
1759                 push_space -= btrfs_item_size(right, item);
1760                 btrfs_set_item_offset(right, item, push_space);
1761         }
1762
1763         if (right->map_token) {
1764                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1765                 right->map_token = NULL;
1766         }
1767         left_nritems -= push_items;
1768         btrfs_set_header_nritems(left, left_nritems);
1769
1770         if (left_nritems)
1771                 btrfs_mark_buffer_dirty(left);
1772         btrfs_mark_buffer_dirty(right);
1773
1774         btrfs_item_key(right, &disk_key, 0);
1775         btrfs_set_node_key(upper, &disk_key, slot + 1);
1776         btrfs_mark_buffer_dirty(upper);
1777
1778         /* then fixup the leaf pointer in the path */
1779         if (path->slots[0] >= left_nritems) {
1780                 path->slots[0] -= left_nritems;
1781                 free_extent_buffer(path->nodes[0]);
1782                 path->nodes[0] = right;
1783                 path->slots[1] += 1;
1784         } else {
1785                 free_extent_buffer(right);
1786         }
1787         return 0;
1788 }
1789 /*
1790  * push some data in the path leaf to the left, trying to free up at
1791  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1792  */
1793 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1794                           *root, struct btrfs_path *path, int data_size,
1795                           int empty)
1796 {
1797         struct btrfs_disk_key disk_key;
1798         struct extent_buffer *right = path->nodes[0];
1799         struct extent_buffer *left;
1800         int slot;
1801         int i;
1802         int free_space;
1803         int push_space = 0;
1804         int push_items = 0;
1805         struct btrfs_item *item;
1806         u32 old_left_nritems;
1807         u32 right_nritems;
1808         u32 nr;
1809         int ret = 0;
1810         int wret;
1811         u32 this_item_size;
1812         u32 old_left_item_size;
1813
1814         slot = path->slots[1];
1815         if (slot == 0)
1816                 return 1;
1817         if (!path->nodes[1])
1818                 return 1;
1819
1820         right_nritems = btrfs_header_nritems(right);
1821         if (right_nritems == 0) {
1822                 return 1;
1823         }
1824
1825         left = read_node_slot(root, path->nodes[1], slot - 1);
1826         free_space = btrfs_leaf_free_space(root, left);
1827         if (free_space < data_size + sizeof(struct btrfs_item)) {
1828                 free_extent_buffer(left);
1829                 return 1;
1830         }
1831
1832         /* cow and double check */
1833         ret = btrfs_cow_block(trans, root, left,
1834                               path->nodes[1], slot - 1, &left);
1835         if (ret) {
1836                 /* we hit -ENOSPC, but it isn't fatal here */
1837                 free_extent_buffer(left);
1838                 return 1;
1839         }
1840
1841         free_space = btrfs_leaf_free_space(root, left);
1842         if (free_space < data_size + sizeof(struct btrfs_item)) {
1843                 free_extent_buffer(left);
1844                 return 1;
1845         }
1846
1847         if (empty)
1848                 nr = right_nritems;
1849         else
1850                 nr = right_nritems - 1;
1851
1852         for (i = 0; i < nr; i++) {
1853                 item = btrfs_item_nr(right, i);
1854                 if (!right->map_token) {
1855                         map_extent_buffer(right, (unsigned long)item,
1856                                         sizeof(struct btrfs_item),
1857                                         &right->map_token, &right->kaddr,
1858                                         &right->map_start, &right->map_len,
1859                                         KM_USER1);
1860                 }
1861
1862                 if (path->slots[0] == i)
1863                         push_space += data_size + sizeof(*item);
1864
1865                 this_item_size = btrfs_item_size(right, item);
1866                 if (this_item_size + sizeof(*item) + push_space > free_space)
1867                         break;
1868
1869                 push_items++;
1870                 push_space += this_item_size + sizeof(*item);
1871         }
1872
1873         if (right->map_token) {
1874                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1875                 right->map_token = NULL;
1876         }
1877
1878         if (push_items == 0) {
1879                 free_extent_buffer(left);
1880                 return 1;
1881         }
1882         if (!empty && push_items == btrfs_header_nritems(right))
1883                 WARN_ON(1);
1884
1885         /* push data from right to left */
1886         copy_extent_buffer(left, right,
1887                            btrfs_item_nr_offset(btrfs_header_nritems(left)),
1888                            btrfs_item_nr_offset(0),
1889                            push_items * sizeof(struct btrfs_item));
1890
1891         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1892                      btrfs_item_offset_nr(right, push_items -1);
1893
1894         copy_extent_buffer(left, right, btrfs_leaf_data(left) +
1895                      leaf_data_end(root, left) - push_space,
1896                      btrfs_leaf_data(right) +
1897                      btrfs_item_offset_nr(right, push_items - 1),
1898                      push_space);
1899         old_left_nritems = btrfs_header_nritems(left);
1900         BUG_ON(old_left_nritems < 0);
1901
1902         old_left_item_size = btrfs_item_offset_nr(left, old_left_nritems - 1);
1903         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1904                 u32 ioff;
1905
1906                 item = btrfs_item_nr(left, i);
1907                 if (!left->map_token) {
1908                         map_extent_buffer(left, (unsigned long)item,
1909                                         sizeof(struct btrfs_item),
1910                                         &left->map_token, &left->kaddr,
1911                                         &left->map_start, &left->map_len,
1912                                         KM_USER1);
1913                 }
1914
1915                 ioff = btrfs_item_offset(left, item);
1916                 btrfs_set_item_offset(left, item,
1917                       ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size));
1918         }
1919         btrfs_set_header_nritems(left, old_left_nritems + push_items);
1920         if (left->map_token) {
1921                 unmap_extent_buffer(left, left->map_token, KM_USER1);
1922                 left->map_token = NULL;
1923         }
1924
1925         /* fixup right node */
1926         if (push_items > right_nritems) {
1927                 printk("push items %d nr %u\n", push_items, right_nritems);
1928                 WARN_ON(1);
1929         }
1930
1931         if (push_items < right_nritems) {
1932                 push_space = btrfs_item_offset_nr(right, push_items - 1) -
1933                                                   leaf_data_end(root, right);
1934                 memmove_extent_buffer(right, btrfs_leaf_data(right) +
1935                                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1936                                       btrfs_leaf_data(right) +
1937                                       leaf_data_end(root, right), push_space);
1938
1939                 memmove_extent_buffer(right, btrfs_item_nr_offset(0),
1940                               btrfs_item_nr_offset(push_items),
1941                              (btrfs_header_nritems(right) - push_items) *
1942                              sizeof(struct btrfs_item));
1943         }
1944         right_nritems -= push_items;
1945         btrfs_set_header_nritems(right, right_nritems);
1946         push_space = BTRFS_LEAF_DATA_SIZE(root);
1947         for (i = 0; i < right_nritems; i++) {
1948                 item = btrfs_item_nr(right, i);
1949
1950                 if (!right->map_token) {
1951                         map_extent_buffer(right, (unsigned long)item,
1952                                         sizeof(struct btrfs_item),
1953                                         &right->map_token, &right->kaddr,
1954                                         &right->map_start, &right->map_len,
1955                                         KM_USER1);
1956                 }
1957
1958                 push_space = push_space - btrfs_item_size(right, item);
1959                 btrfs_set_item_offset(right, item, push_space);
1960         }
1961         if (right->map_token) {
1962                 unmap_extent_buffer(right, right->map_token, KM_USER1);
1963                 right->map_token = NULL;
1964         }
1965
1966         btrfs_mark_buffer_dirty(left);
1967         if (right_nritems)
1968                 btrfs_mark_buffer_dirty(right);
1969
1970         btrfs_item_key(right, &disk_key, 0);
1971         wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1972         if (wret)
1973                 ret = wret;
1974
1975         /* then fixup the leaf pointer in the path */
1976         if (path->slots[0] < push_items) {
1977                 path->slots[0] += old_left_nritems;
1978                 free_extent_buffer(path->nodes[0]);
1979                 path->nodes[0] = left;
1980                 path->slots[1] -= 1;
1981         } else {
1982                 free_extent_buffer(left);
1983                 path->slots[0] -= push_items;
1984         }
1985         BUG_ON(path->slots[0] < 0);
1986         return ret;
1987 }
1988
1989 /*
1990  * split the path's leaf in two, making sure there is at least data_size
1991  * available for the resulting leaf level of the path.
1992  *
1993  * returns 0 if all went well and < 0 on failure.
1994  */
1995 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1996                       *root, struct btrfs_key *ins_key,
1997                       struct btrfs_path *path, int data_size, int extend)
1998 {
1999         u64 root_gen;
2000         struct extent_buffer *l;
2001         u32 nritems;
2002         int mid;
2003         int slot;
2004         struct extent_buffer *right;
2005         int space_needed = data_size + sizeof(struct btrfs_item);
2006         int data_copy_size;
2007         int rt_data_off;
2008         int i;
2009         int ret = 0;
2010         int wret;
2011         int double_split;
2012         int num_doubles = 0;
2013         struct btrfs_disk_key disk_key;
2014
2015         if (extend)
2016                 space_needed = data_size;
2017
2018         if (root->ref_cows)
2019                 root_gen = trans->transid;
2020         else
2021                 root_gen = 0;
2022
2023         /* first try to make some room by pushing left and right */
2024         if (ins_key->type != BTRFS_DIR_ITEM_KEY) {
2025                 wret = push_leaf_right(trans, root, path, data_size, 0);
2026                 if (wret < 0) {
2027                         return wret;
2028                 }
2029                 if (wret) {
2030                         wret = push_leaf_left(trans, root, path, data_size, 0);
2031                         if (wret < 0)
2032                                 return wret;
2033                 }
2034                 l = path->nodes[0];
2035
2036                 /* did the pushes work? */
2037                 if (btrfs_leaf_free_space(root, l) >= space_needed)
2038                         return 0;
2039         }
2040
2041         if (!path->nodes[1]) {
2042                 ret = insert_new_root(trans, root, path, 1);
2043                 if (ret)
2044                         return ret;
2045         }
2046 again:
2047         double_split = 0;
2048         l = path->nodes[0];
2049         slot = path->slots[0];
2050         nritems = btrfs_header_nritems(l);
2051         mid = (nritems + 1)/ 2;
2052
2053         btrfs_item_key(l, &disk_key, 0);
2054
2055         right = __btrfs_alloc_free_block(trans, root, root->leafsize,
2056                                          root->root_key.objectid,
2057                                          root_gen, disk_key.objectid, 0,
2058                                          l->start, 0);
2059         if (IS_ERR(right)) {
2060                 BUG_ON(1);
2061                 return PTR_ERR(right);
2062         }
2063
2064         memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header));
2065         btrfs_set_header_bytenr(right, right->start);
2066         btrfs_set_header_generation(right, trans->transid);
2067         btrfs_set_header_owner(right, root->root_key.objectid);
2068         btrfs_set_header_level(right, 0);
2069         write_extent_buffer(right, root->fs_info->fsid,
2070                             (unsigned long)btrfs_header_fsid(right),
2071                             BTRFS_FSID_SIZE);
2072
2073         write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
2074                             (unsigned long)btrfs_header_chunk_tree_uuid(right),
2075                             BTRFS_UUID_SIZE);
2076         if (mid <= slot) {
2077                 if (nritems == 1 ||
2078                     leaf_space_used(l, mid, nritems - mid) + space_needed >
2079                         BTRFS_LEAF_DATA_SIZE(root)) {
2080                         if (slot >= nritems) {
2081                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2082                                 btrfs_set_header_nritems(right, 0);
2083                                 wret = insert_ptr(trans, root, path,
2084                                                   &disk_key, right->start,
2085                                                   path->slots[1] + 1, 1);
2086                                 if (wret)
2087                                         ret = wret;
2088                                 free_extent_buffer(path->nodes[0]);
2089                                 path->nodes[0] = right;
2090                                 path->slots[0] = 0;
2091                                 path->slots[1] += 1;
2092                                 btrfs_mark_buffer_dirty(right);
2093                                 return ret;
2094                         }
2095                         mid = slot;
2096                         if (mid != nritems &&
2097                             leaf_space_used(l, mid, nritems - mid) +
2098                             space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2099                                 double_split = 1;
2100                         }
2101                 }
2102         } else {
2103                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
2104                         BTRFS_LEAF_DATA_SIZE(root)) {
2105                         if (!extend && slot == 0) {
2106                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
2107                                 btrfs_set_header_nritems(right, 0);
2108                                 wret = insert_ptr(trans, root, path,
2109                                                   &disk_key,
2110                                                   right->start,
2111                                                   path->slots[1], 1);
2112                                 if (wret)
2113                                         ret = wret;
2114                                 free_extent_buffer(path->nodes[0]);
2115                                 path->nodes[0] = right;
2116                                 path->slots[0] = 0;
2117                                 if (path->slots[1] == 0) {
2118                                         wret = fixup_low_keys(trans, root,
2119                                                    path, &disk_key, 1);
2120                                         if (wret)
2121                                                 ret = wret;
2122                                 }
2123                                 btrfs_mark_buffer_dirty(right);
2124                                 return ret;
2125                         } else if (extend && slot == 0) {
2126                                 mid = 1;
2127                         } else {
2128                                 mid = slot;
2129                                 if (mid != nritems &&
2130                                     leaf_space_used(l, mid, nritems - mid) +
2131                                     space_needed > BTRFS_LEAF_DATA_SIZE(root)) {
2132                                         double_split = 1;
2133                                 }
2134                         }
2135                 }
2136         }
2137         nritems = nritems - mid;
2138         btrfs_set_header_nritems(right, nritems);
2139         data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l);
2140
2141         copy_extent_buffer(right, l, btrfs_item_nr_offset(0),
2142                            btrfs_item_nr_offset(mid),
2143                            nritems * sizeof(struct btrfs_item));
2144
2145         copy_extent_buffer(right, l,
2146                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
2147                      data_copy_size, btrfs_leaf_data(l) +
2148                      leaf_data_end(root, l), data_copy_size);
2149
2150         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
2151                       btrfs_item_end_nr(l, mid);
2152
2153         for (i = 0; i < nritems; i++) {
2154                 struct btrfs_item *item = btrfs_item_nr(right, i);
2155                 u32 ioff;
2156
2157                 if (!right->map_token) {
2158                         map_extent_buffer(right, (unsigned long)item,
2159                                         sizeof(struct btrfs_item),
2160                                         &right->map_token, &right->kaddr,
2161                                         &right->map_start, &right->map_len,
2162                                         KM_USER1);
2163                 }
2164
2165                 ioff = btrfs_item_offset(right, item);
2166                 btrfs_set_item_offset(right, item, ioff + rt_data_off);
2167         }
2168
2169         if (right->map_token) {
2170                 unmap_extent_buffer(right, right->map_token, KM_USER1);
2171                 right->map_token = NULL;
2172         }
2173
2174         btrfs_set_header_nritems(l, mid);
2175         ret = 0;
2176         btrfs_item_key(right, &disk_key, 0);
2177         wret = insert_ptr(trans, root, path, &disk_key, right->start,
2178                           path->slots[1] + 1, 1);
2179         if (wret)
2180                 ret = wret;
2181
2182         btrfs_mark_buffer_dirty(right);
2183         btrfs_mark_buffer_dirty(l);
2184         BUG_ON(path->slots[0] != slot);
2185
2186         if (mid <= slot) {
2187                 free_extent_buffer(path->nodes[0]);
2188                 path->nodes[0] = right;
2189                 path->slots[0] -= mid;
2190                 path->slots[1] += 1;
2191         } else
2192                 free_extent_buffer(right);
2193
2194         BUG_ON(path->slots[0] < 0);
2195
2196         if (double_split) {
2197                 BUG_ON(num_doubles != 0);
2198                 num_doubles++;
2199                 goto again;
2200         }
2201         return ret;
2202 }
2203
2204 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
2205                         struct btrfs_root *root,
2206                         struct btrfs_path *path,
2207                         u32 new_size, int from_end)
2208 {
2209         int ret = 0;
2210         int slot;
2211         int slot_orig;
2212         struct extent_buffer *leaf;
2213         struct btrfs_item *item;
2214         u32 nritems;
2215         unsigned int data_end;
2216         unsigned int old_data_start;
2217         unsigned int old_size;
2218         unsigned int size_diff;
2219         int i;
2220
2221         slot_orig = path->slots[0];
2222         leaf = path->nodes[0];
2223         slot = path->slots[0];
2224
2225         old_size = btrfs_item_size_nr(leaf, slot);
2226         if (old_size == new_size)
2227                 return 0;
2228
2229         nritems = btrfs_header_nritems(leaf);
2230         data_end = leaf_data_end(root, leaf);
2231
2232         old_data_start = btrfs_item_offset_nr(leaf, slot);
2233
2234         size_diff = old_size - new_size;
2235
2236         BUG_ON(slot < 0);
2237         BUG_ON(slot >= nritems);
2238
2239         /*
2240          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2241          */
2242         /* first correct the data pointers */
2243         for (i = slot; i < nritems; i++) {
2244                 u32 ioff;
2245                 item = btrfs_item_nr(leaf, i);
2246
2247                 if (!leaf->map_token) {
2248                         map_extent_buffer(leaf, (unsigned long)item,
2249                                         sizeof(struct btrfs_item),
2250                                         &leaf->map_token, &leaf->kaddr,
2251                                         &leaf->map_start, &leaf->map_len,
2252                                         KM_USER1);
2253                 }
2254
2255                 ioff = btrfs_item_offset(leaf, item);
2256                 btrfs_set_item_offset(leaf, item, ioff + size_diff);
2257         }
2258
2259         if (leaf->map_token) {
2260                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2261                 leaf->map_token = NULL;
2262         }
2263
2264         /* shift the data */
2265         if (from_end) {
2266                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2267                               data_end + size_diff, btrfs_leaf_data(leaf) +
2268                               data_end, old_data_start + new_size - data_end);
2269         } else {
2270                 struct btrfs_disk_key disk_key;
2271                 u64 offset;
2272
2273                 btrfs_item_key(leaf, &disk_key, slot);
2274
2275                 if (btrfs_disk_key_type(&disk_key) == BTRFS_EXTENT_DATA_KEY) {
2276                         unsigned long ptr;
2277                         struct btrfs_file_extent_item *fi;
2278
2279                         fi = btrfs_item_ptr(leaf, slot,
2280                                             struct btrfs_file_extent_item);
2281                         fi = (struct btrfs_file_extent_item *)(
2282                              (unsigned long)fi - size_diff);
2283
2284                         if (btrfs_file_extent_type(leaf, fi) ==
2285                             BTRFS_FILE_EXTENT_INLINE) {
2286                                 ptr = btrfs_item_ptr_offset(leaf, slot);
2287                                 memmove_extent_buffer(leaf, ptr,
2288                                         (unsigned long)fi,
2289                                         offsetof(struct btrfs_file_extent_item,
2290                                                  disk_bytenr));
2291                         }
2292                 }
2293
2294                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2295                               data_end + size_diff, btrfs_leaf_data(leaf) +
2296                               data_end, old_data_start - data_end);
2297
2298                 offset = btrfs_disk_key_offset(&disk_key);
2299                 btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
2300                 btrfs_set_item_key(leaf, &disk_key, slot);
2301                 if (slot == 0)
2302                         fixup_low_keys(trans, root, path, &disk_key, 1);
2303         }
2304
2305         item = btrfs_item_nr(leaf, slot);
2306         btrfs_set_item_size(leaf, item, new_size);
2307         btrfs_mark_buffer_dirty(leaf);
2308
2309         ret = 0;
2310         if (btrfs_leaf_free_space(root, leaf) < 0) {
2311                 btrfs_print_leaf(root, leaf);
2312                 BUG();
2313         }
2314         return ret;
2315 }
2316
2317 int btrfs_extend_item(struct btrfs_trans_handle *trans,
2318                       struct btrfs_root *root, struct btrfs_path *path,
2319                       u32 data_size)
2320 {
2321         int ret = 0;
2322         int slot;
2323         int slot_orig;
2324         struct extent_buffer *leaf;
2325         struct btrfs_item *item;
2326         u32 nritems;
2327         unsigned int data_end;
2328         unsigned int old_data;
2329         unsigned int old_size;
2330         int i;
2331
2332         slot_orig = path->slots[0];
2333         leaf = path->nodes[0];
2334
2335         nritems = btrfs_header_nritems(leaf);
2336         data_end = leaf_data_end(root, leaf);
2337
2338         if (btrfs_leaf_free_space(root, leaf) < data_size) {
2339                 btrfs_print_leaf(root, leaf);
2340                 BUG();
2341         }
2342         slot = path->slots[0];
2343         old_data = btrfs_item_end_nr(leaf, slot);
2344
2345         BUG_ON(slot < 0);
2346         if (slot >= nritems) {
2347                 btrfs_print_leaf(root, leaf);
2348                 printk("slot %d too large, nritems %d\n", slot, nritems);
2349                 BUG_ON(1);
2350         }
2351
2352         /*
2353          * item0..itemN ... dataN.offset..dataN.size .. data0.size
2354          */
2355         /* first correct the data pointers */
2356         for (i = slot; i < nritems; i++) {
2357                 u32 ioff;
2358                 item = btrfs_item_nr(leaf, i);
2359
2360                 if (!leaf->map_token) {
2361                         map_extent_buffer(leaf, (unsigned long)item,
2362                                         sizeof(struct btrfs_item),
2363                                         &leaf->map_token, &leaf->kaddr,
2364                                         &leaf->map_start, &leaf->map_len,
2365                                         KM_USER1);
2366                 }
2367                 ioff = btrfs_item_offset(leaf, item);
2368                 btrfs_set_item_offset(leaf, item, ioff - data_size);
2369         }
2370
2371         if (leaf->map_token) {
2372                 unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2373                 leaf->map_token = NULL;
2374         }
2375
2376         /* shift the data */
2377         memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2378                       data_end - data_size, btrfs_leaf_data(leaf) +
2379                       data_end, old_data - data_end);
2380
2381         data_end = old_data;
2382         old_size = btrfs_item_size_nr(leaf, slot);
2383         item = btrfs_item_nr(leaf, slot);
2384         btrfs_set_item_size(leaf, item, old_size + data_size);
2385         btrfs_mark_buffer_dirty(leaf);
2386
2387         ret = 0;
2388         if (btrfs_leaf_free_space(root, leaf) < 0) {
2389                 btrfs_print_leaf(root, leaf);
2390                 BUG();
2391         }
2392         return ret;
2393 }
2394
2395 /*
2396  * Given a key and some data, insert an item into the tree.
2397  * This does all the path init required, making room in the tree if needed.
2398  */
2399 int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
2400                             struct btrfs_root *root,
2401                             struct btrfs_path *path,
2402                             struct btrfs_key *cpu_key, u32 *data_size,
2403                             int nr)
2404 {
2405         struct extent_buffer *leaf;
2406         struct btrfs_item *item;
2407         int ret = 0;
2408         int slot;
2409         int slot_orig;
2410         int i;
2411         u32 nritems;
2412         u32 total_size = 0;
2413         u32 total_data = 0;
2414         unsigned int data_end;
2415         struct btrfs_disk_key disk_key;
2416
2417         for (i = 0; i < nr; i++) {
2418                 total_data += data_size[i];
2419         }
2420
2421         /* create a root if there isn't one */
2422         if (!root->node)
2423                 BUG();
2424
2425         total_size = total_data + (nr - 1) * sizeof(struct btrfs_item);
2426         ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1);
2427         if (ret == 0) {
2428                 return -EEXIST;
2429         }
2430         if (ret < 0)
2431                 goto out;
2432
2433         slot_orig = path->slots[0];
2434         leaf = path->nodes[0];
2435
2436         nritems = btrfs_header_nritems(leaf);
2437         data_end = leaf_data_end(root, leaf);
2438
2439         if (btrfs_leaf_free_space(root, leaf) <
2440             sizeof(struct btrfs_item) + total_size) {
2441                 btrfs_print_leaf(root, leaf);
2442                 printk("not enough freespace need %u have %d\n",
2443                        total_size, btrfs_leaf_free_space(root, leaf));
2444                 BUG();
2445         }
2446
2447         slot = path->slots[0];
2448         BUG_ON(slot < 0);
2449
2450         if (slot != nritems) {
2451                 int i;
2452                 unsigned int old_data = btrfs_item_end_nr(leaf, slot);
2453
2454                 if (old_data < data_end) {
2455                         btrfs_print_leaf(root, leaf);
2456                         printk("slot %d old_data %d data_end %d\n",
2457                                slot, old_data, data_end);
2458                         BUG_ON(1);
2459                 }
2460                 /*
2461                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
2462                  */
2463                 /* first correct the data pointers */
2464                 WARN_ON(leaf->map_token);
2465                 for (i = slot; i < nritems; i++) {
2466                         u32 ioff;
2467
2468                         item = btrfs_item_nr(leaf, i);
2469                         if (!leaf->map_token) {
2470                                 map_extent_buffer(leaf, (unsigned long)item,
2471                                         sizeof(struct btrfs_item),
2472                                         &leaf->map_token, &leaf->kaddr,
2473                                         &leaf->map_start, &leaf->map_len,
2474                                         KM_USER1);
2475                         }
2476
2477                         ioff = btrfs_item_offset(leaf, item);
2478                         btrfs_set_item_offset(leaf, item, ioff - total_data);
2479                 }
2480                 if (leaf->map_token) {
2481                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2482                         leaf->map_token = NULL;
2483                 }
2484
2485                 /* shift the items */
2486                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr),
2487                               btrfs_item_nr_offset(slot),
2488                               (nritems - slot) * sizeof(struct btrfs_item));
2489
2490                 /* shift the data */
2491                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2492                               data_end - total_data, btrfs_leaf_data(leaf) +
2493                               data_end, old_data - data_end);
2494                 data_end = old_data;
2495         }
2496
2497         /* setup the item for the new data */
2498         for (i = 0; i < nr; i++) {
2499                 btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
2500                 btrfs_set_item_key(leaf, &disk_key, slot + i);
2501                 item = btrfs_item_nr(leaf, slot + i);
2502                 btrfs_set_item_offset(leaf, item, data_end - data_size[i]);
2503                 data_end -= data_size[i];
2504                 btrfs_set_item_size(leaf, item, data_size[i]);
2505         }
2506         btrfs_set_header_nritems(leaf, nritems + nr);
2507         btrfs_mark_buffer_dirty(leaf);
2508
2509         ret = 0;
2510         if (slot == 0) {
2511                 btrfs_cpu_key_to_disk(&disk_key, cpu_key);
2512                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
2513         }
2514
2515         if (btrfs_leaf_free_space(root, leaf) < 0) {
2516                 btrfs_print_leaf(root, leaf);
2517                 BUG();
2518         }
2519
2520 out:
2521         return ret;
2522 }
2523
2524 /*
2525  * Given a key and some data, insert an item into the tree.
2526  * This does all the path init required, making room in the tree if needed.
2527  */
2528 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
2529                       *root, struct btrfs_key *cpu_key, void *data, u32
2530                       data_size)
2531 {
2532         int ret = 0;
2533         struct btrfs_path *path;
2534         struct extent_buffer *leaf;
2535         unsigned long ptr;
2536
2537         path = btrfs_alloc_path();
2538         BUG_ON(!path);
2539         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
2540         if (!ret) {
2541                 leaf = path->nodes[0];
2542                 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
2543                 write_extent_buffer(leaf, data, ptr, data_size);
2544                 btrfs_mark_buffer_dirty(leaf);
2545         }
2546         btrfs_free_path(path);
2547         return ret;
2548 }
2549
2550 /*
2551  * delete the pointer from a given node.
2552  *
2553  * If the delete empties a node, the node is removed from the tree,
2554  * continuing all the way the root if required.  The root is converted into
2555  * a leaf if all the nodes are emptied.
2556  */
2557 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2558                    struct btrfs_path *path, int level, int slot)
2559 {
2560         struct extent_buffer *parent = path->nodes[level];
2561         u32 nritems;
2562         int ret = 0;
2563         int wret;
2564
2565         nritems = btrfs_header_nritems(parent);
2566         if (slot != nritems -1) {
2567                 memmove_extent_buffer(parent,
2568                               btrfs_node_key_ptr_offset(slot),
2569                               btrfs_node_key_ptr_offset(slot + 1),
2570                               sizeof(struct btrfs_key_ptr) *
2571                               (nritems - slot - 1));
2572         }
2573         nritems--;
2574         btrfs_set_header_nritems(parent, nritems);
2575         if (nritems == 0 && parent == root->node) {
2576                 BUG_ON(btrfs_header_level(root->node) != 1);
2577                 /* just turn the root into a leaf and break */
2578                 btrfs_set_header_level(root->node, 0);
2579         } else if (slot == 0) {
2580                 struct btrfs_disk_key disk_key;
2581
2582                 btrfs_node_key(parent, &disk_key, 0);
2583                 wret = fixup_low_keys(trans, root, path, &disk_key, level + 1);
2584                 if (wret)
2585                         ret = wret;
2586         }
2587         btrfs_mark_buffer_dirty(parent);
2588         return ret;
2589 }
2590
2591 /*
2592  * delete the item at the leaf level in path.  If that empties
2593  * the leaf, remove it from the tree
2594  */
2595 int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
2596                     struct btrfs_path *path, int slot, int nr)
2597 {
2598         struct extent_buffer *leaf;
2599         struct btrfs_item *item;
2600         int last_off;
2601         int dsize = 0;
2602         int ret = 0;
2603         int wret;
2604         int i;
2605         u32 nritems;
2606
2607         leaf = path->nodes[0];
2608         last_off = btrfs_item_offset_nr(leaf, slot + nr - 1);
2609
2610         for (i = 0; i < nr; i++)
2611                 dsize += btrfs_item_size_nr(leaf, slot + i);
2612
2613         nritems = btrfs_header_nritems(leaf);
2614
2615         if (slot + nr != nritems) {
2616                 int i;
2617                 int data_end = leaf_data_end(root, leaf);
2618
2619                 memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) +
2620                               data_end + dsize,
2621                               btrfs_leaf_data(leaf) + data_end,
2622                               last_off - data_end);
2623
2624                 for (i = slot + nr; i < nritems; i++) {
2625                         u32 ioff;
2626
2627                         item = btrfs_item_nr(leaf, i);
2628                         if (!leaf->map_token) {
2629                                 map_extent_buffer(leaf, (unsigned long)item,
2630                                         sizeof(struct btrfs_item),
2631                                         &leaf->map_token, &leaf->kaddr,
2632                                         &leaf->map_start, &leaf->map_len,
2633                                         KM_USER1);
2634                         }
2635                         ioff = btrfs_item_offset(leaf, item);
2636                         btrfs_set_item_offset(leaf, item, ioff + dsize);
2637                 }
2638
2639                 if (leaf->map_token) {
2640                         unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
2641                         leaf->map_token = NULL;
2642                 }
2643
2644                 memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot),
2645                               btrfs_item_nr_offset(slot + nr),
2646                               sizeof(struct btrfs_item) *
2647                               (nritems - slot - nr));
2648         }
2649         btrfs_set_header_nritems(leaf, nritems - nr);
2650         nritems -= nr;
2651
2652         /* delete the leaf if we've emptied it */
2653         if (nritems == 0) {
2654                 if (leaf == root->node) {
2655                         btrfs_set_header_level(leaf, 0);
2656                 } else {
2657                         u64 root_gen = btrfs_header_generation(path->nodes[1]);
2658                         clean_tree_block(trans, root, leaf);
2659                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
2660                         if (wret)
2661                                 ret = wret;
2662                         wret = btrfs_free_extent(trans, root,
2663                                          leaf->start, leaf->len,
2664                                          btrfs_header_owner(path->nodes[1]),
2665                                          root_gen, 0, 0, 1);
2666                         if (wret)
2667                                 ret = wret;
2668                 }
2669         } else {
2670                 int used = leaf_space_used(leaf, 0, nritems);
2671                 if (slot == 0) {
2672                         struct btrfs_disk_key disk_key;
2673
2674                         btrfs_item_key(leaf, &disk_key, 0);
2675                         wret = fixup_low_keys(trans, root, path,
2676                                               &disk_key, 1);
2677                         if (wret)
2678                                 ret = wret;
2679                 }
2680
2681                 /* delete the leaf if it is mostly empty */
2682                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) {
2683                         /* push_leaf_left fixes the path.
2684                          * make sure the path still points to our leaf
2685                          * for possible call to del_ptr below
2686                          */
2687                         slot = path->slots[1];
2688                         extent_buffer_get(leaf);
2689
2690                         wret = push_leaf_left(trans, root, path, 1, 1);
2691                         if (wret < 0 && wret != -ENOSPC)
2692                                 ret = wret;
2693
2694                         if (path->nodes[0] == leaf &&
2695                             btrfs_header_nritems(leaf)) {
2696                                 wret = push_leaf_right(trans, root, path, 1, 1);
2697                                 if (wret < 0 && wret != -ENOSPC)
2698                                         ret = wret;
2699                         }
2700
2701                         if (btrfs_header_nritems(leaf) == 0) {
2702                                 u64 root_gen;
2703                                 u64 bytenr = leaf->start;
2704                                 u32 blocksize = leaf->len;
2705
2706                                 root_gen = btrfs_header_generation(
2707                                                            path->nodes[1]);
2708
2709                                 clean_tree_block(trans, root, leaf);
2710
2711                                 wret = del_ptr(trans, root, path, 1, slot);
2712                                 if (wret)
2713                                         ret = wret;
2714
2715                                 free_extent_buffer(leaf);
2716                                 wret = btrfs_free_extent(trans, root, bytenr,
2717                                              blocksize,
2718                                              btrfs_header_owner(path->nodes[1]),
2719                                              root_gen, 0, 0, 1);
2720                                 if (wret)
2721                                         ret = wret;
2722                         } else {
2723                                 btrfs_mark_buffer_dirty(leaf);
2724                                 free_extent_buffer(leaf);
2725                         }
2726                 } else {
2727                         btrfs_mark_buffer_dirty(leaf);
2728                 }
2729         }
2730         return ret;
2731 }
2732
2733 /*
2734  * walk up the tree as far as required to find the previous leaf.
2735  * returns 0 if it found something or 1 if there are no lesser leaves.
2736  * returns < 0 on io errors.
2737  */
2738 int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
2739 {
2740         int slot;
2741         int level = 1;
2742         struct extent_buffer *c;
2743         struct extent_buffer *next = NULL;
2744
2745         while(level < BTRFS_MAX_LEVEL) {
2746                 if (!path->nodes[level])
2747                         return 1;
2748
2749                 slot = path->slots[level];
2750                 c = path->nodes[level];
2751                 if (slot == 0) {
2752                         level++;
2753                         if (level == BTRFS_MAX_LEVEL)
2754                                 return 1;
2755                         continue;
2756                 }
2757                 slot--;
2758
2759                 if (next)
2760                         free_extent_buffer(next);
2761
2762                 next = read_node_slot(root, c, slot);
2763                 break;
2764         }
2765         path->slots[level] = slot;
2766         while(1) {
2767                 level--;
2768                 c = path->nodes[level];
2769                 free_extent_buffer(c);
2770                 slot = btrfs_header_nritems(next);
2771                 if (slot != 0)
2772                         slot--;
2773                 path->nodes[level] = next;
2774                 path->slots[level] = slot;
2775                 if (!level)
2776                         break;
2777                 next = read_node_slot(root, next, slot);
2778         }
2779         return 0;
2780 }
2781
2782 /*
2783  * walk up the tree as far as required to find the next leaf.
2784  * returns 0 if it found something or 1 if there are no greater leaves.
2785  * returns < 0 on io errors.
2786  */
2787 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
2788 {
2789         int slot;
2790         int level = 1;
2791         struct extent_buffer *c;
2792         struct extent_buffer *next = NULL;
2793
2794         while(level < BTRFS_MAX_LEVEL) {
2795                 if (!path->nodes[level])
2796                         return 1;
2797
2798                 slot = path->slots[level] + 1;
2799                 c = path->nodes[level];
2800                 if (slot >= btrfs_header_nritems(c)) {
2801                         level++;
2802                         if (level == BTRFS_MAX_LEVEL)
2803                                 return 1;
2804                         continue;
2805                 }
2806
2807                 if (next)
2808                         free_extent_buffer(next);
2809
2810                 if (path->reada)
2811                         reada_for_search(root, path, level, slot, 0);
2812
2813                 next = read_node_slot(root, c, slot);
2814                 break;
2815         }
2816         path->slots[level] = slot;
2817         while(1) {
2818                 level--;
2819                 c = path->nodes[level];
2820                 free_extent_buffer(c);
2821                 path->nodes[level] = next;
2822                 path->slots[level] = 0;
2823                 if (!level)
2824                         break;
2825                 if (path->reada)
2826                         reada_for_search(root, path, level, 0, 0);
2827                 next = read_node_slot(root, next, 0);
2828         }
2829         return 0;
2830 }
2831
2832 int btrfs_previous_item(struct btrfs_root *root,
2833                         struct btrfs_path *path, u64 min_objectid,
2834                         int type)
2835 {
2836         struct btrfs_key found_key;
2837         struct extent_buffer *leaf;
2838         int ret;
2839
2840         while(1) {
2841                 if (path->slots[0] == 0) {
2842                         ret = btrfs_prev_leaf(root, path);
2843                         if (ret != 0)
2844                                 return ret;
2845                 } else {
2846                         path->slots[0]--;
2847                 }
2848                 leaf = path->nodes[0];
2849                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2850                 if (found_key.type == type)
2851                         return 0;
2852         }
2853         return 1;
2854 }
2855