Btrfs: allocator improvements, inode block groups
[linux-2.6] / fs / btrfs / ctree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4 #include "transaction.h"
5
6 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
7                       *root, struct btrfs_path *path, int level);
8 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
9                       *root, struct btrfs_key *ins_key,
10                       struct btrfs_path *path, int data_size);
11 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
12                           *root, struct buffer_head *dst, struct buffer_head
13                           *src);
14 static int balance_node_right(struct btrfs_trans_handle *trans, struct
15                               btrfs_root *root, struct buffer_head *dst_buf,
16                               struct buffer_head *src_buf);
17 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
18                    struct btrfs_path *path, int level, int slot);
19
20 inline void btrfs_init_path(struct btrfs_path *p)
21 {
22         memset(p, 0, sizeof(*p));
23 }
24
25 struct btrfs_path *btrfs_alloc_path(void)
26 {
27         struct btrfs_path *path;
28         path = kmem_cache_alloc(btrfs_path_cachep, GFP_NOFS);
29         if (path)
30                 btrfs_init_path(path);
31         return path;
32 }
33
34 void btrfs_free_path(struct btrfs_path *p)
35 {
36         btrfs_release_path(NULL, p);
37         kmem_cache_free(btrfs_path_cachep, p);
38 }
39
40 void btrfs_release_path(struct btrfs_root *root, struct btrfs_path *p)
41 {
42         int i;
43         for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
44                 if (!p->nodes[i])
45                         break;
46                 btrfs_block_release(root, p->nodes[i]);
47         }
48         memset(p, 0, sizeof(*p));
49 }
50
51 static int btrfs_cow_block(struct btrfs_trans_handle *trans, struct btrfs_root
52                            *root, struct buffer_head *buf, struct buffer_head
53                            *parent, int parent_slot, struct buffer_head
54                            **cow_ret)
55 {
56         struct buffer_head *cow;
57         struct btrfs_node *cow_node;
58
59         if (btrfs_header_generation(btrfs_buffer_header(buf)) ==
60                                     trans->transid) {
61                 *cow_ret = buf;
62                 return 0;
63         }
64         cow = btrfs_alloc_free_block(trans, root, buf->b_blocknr);
65         cow_node = btrfs_buffer_node(cow);
66         if (buf->b_size != root->blocksize || cow->b_size != root->blocksize)
67                 WARN_ON(1);
68         memcpy(cow_node, btrfs_buffer_node(buf), root->blocksize);
69         btrfs_set_header_blocknr(&cow_node->header, bh_blocknr(cow));
70         btrfs_set_header_generation(&cow_node->header, trans->transid);
71         btrfs_set_header_owner(&cow_node->header, root->root_key.objectid);
72         btrfs_inc_ref(trans, root, buf);
73         if (buf == root->node) {
74                 root->node = cow;
75                 get_bh(cow);
76                 if (buf != root->commit_root) {
77                         btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
78                 }
79                 btrfs_block_release(root, buf);
80         } else {
81                 btrfs_set_node_blockptr(btrfs_buffer_node(parent), parent_slot,
82                                         bh_blocknr(cow));
83                 btrfs_mark_buffer_dirty(parent);
84                 btrfs_free_extent(trans, root, bh_blocknr(buf), 1, 1);
85         }
86         btrfs_block_release(root, buf);
87         mark_buffer_dirty(cow);
88         *cow_ret = cow;
89         return 0;
90 }
91
92 /*
93  * The leaf data grows from end-to-front in the node.
94  * this returns the address of the start of the last item,
95  * which is the stop of the leaf data stack
96  */
97 static inline unsigned int leaf_data_end(struct btrfs_root *root,
98                                          struct btrfs_leaf *leaf)
99 {
100         u32 nr = btrfs_header_nritems(&leaf->header);
101         if (nr == 0)
102                 return BTRFS_LEAF_DATA_SIZE(root);
103         return btrfs_item_offset(leaf->items + nr - 1);
104 }
105
106 /*
107  * compare two keys in a memcmp fashion
108  */
109 static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2)
110 {
111         struct btrfs_key k1;
112
113         btrfs_disk_key_to_cpu(&k1, disk);
114
115         if (k1.objectid > k2->objectid)
116                 return 1;
117         if (k1.objectid < k2->objectid)
118                 return -1;
119         if (k1.flags > k2->flags)
120                 return 1;
121         if (k1.flags < k2->flags)
122                 return -1;
123         if (k1.offset > k2->offset)
124                 return 1;
125         if (k1.offset < k2->offset)
126                 return -1;
127         return 0;
128 }
129
130 static int check_node(struct btrfs_root *root, struct btrfs_path *path,
131                       int level)
132 {
133         int i;
134         struct btrfs_node *parent = NULL;
135         struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
136         int parent_slot;
137         u32 nritems = btrfs_header_nritems(&node->header);
138
139         if (path->nodes[level + 1])
140                 parent = btrfs_buffer_node(path->nodes[level + 1]);
141         parent_slot = path->slots[level + 1];
142         BUG_ON(nritems == 0);
143         if (parent) {
144                 struct btrfs_disk_key *parent_key;
145                 parent_key = &parent->ptrs[parent_slot].key;
146                 BUG_ON(memcmp(parent_key, &node->ptrs[0].key,
147                               sizeof(struct btrfs_disk_key)));
148                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
149                        btrfs_header_blocknr(&node->header));
150         }
151         BUG_ON(nritems > BTRFS_NODEPTRS_PER_BLOCK(root));
152         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
153                 struct btrfs_key cpukey;
154                 btrfs_disk_key_to_cpu(&cpukey, &node->ptrs[i + 1].key);
155                 BUG_ON(comp_keys(&node->ptrs[i].key, &cpukey) >= 0);
156         }
157         return 0;
158 }
159
160 static int check_leaf(struct btrfs_root *root, struct btrfs_path *path,
161                       int level)
162 {
163         int i;
164         struct btrfs_leaf *leaf = btrfs_buffer_leaf(path->nodes[level]);
165         struct btrfs_node *parent = NULL;
166         int parent_slot;
167         u32 nritems = btrfs_header_nritems(&leaf->header);
168
169         if (path->nodes[level + 1])
170                 parent = btrfs_buffer_node(path->nodes[level + 1]);
171         parent_slot = path->slots[level + 1];
172         BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
173
174         if (nritems == 0)
175                 return 0;
176
177         if (parent) {
178                 struct btrfs_disk_key *parent_key;
179                 parent_key = &parent->ptrs[parent_slot].key;
180                 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
181                        sizeof(struct btrfs_disk_key)));
182                 BUG_ON(btrfs_node_blockptr(parent, parent_slot) !=
183                        btrfs_header_blocknr(&leaf->header));
184         }
185         for (i = 0; nritems > 1 && i < nritems - 2; i++) {
186                 struct btrfs_key cpukey;
187                 btrfs_disk_key_to_cpu(&cpukey, &leaf->items[i + 1].key);
188                 BUG_ON(comp_keys(&leaf->items[i].key,
189                                  &cpukey) >= 0);
190                 BUG_ON(btrfs_item_offset(leaf->items + i) !=
191                         btrfs_item_end(leaf->items + i + 1));
192                 if (i == 0) {
193                         BUG_ON(btrfs_item_offset(leaf->items + i) +
194                                btrfs_item_size(leaf->items + i) !=
195                                BTRFS_LEAF_DATA_SIZE(root));
196                 }
197         }
198         return 0;
199 }
200
201 static int check_block(struct btrfs_root *root, struct btrfs_path *path,
202                         int level)
203 {
204         struct btrfs_node *node = btrfs_buffer_node(path->nodes[level]);
205         if (memcmp(node->header.fsid, root->fs_info->disk_super->fsid,
206                    sizeof(node->header.fsid)))
207                 BUG();
208         if (level == 0)
209                 return check_leaf(root, path, level);
210         return check_node(root, path, level);
211 }
212
213 /*
214  * search for key in the array p.  items p are item_size apart
215  * and there are 'max' items in p
216  * the slot in the array is returned via slot, and it points to
217  * the place where you would insert key if it is not found in
218  * the array.
219  *
220  * slot may point to max if the key is bigger than all of the keys
221  */
222 static int generic_bin_search(char *p, int item_size, struct btrfs_key *key,
223                        int max, int *slot)
224 {
225         int low = 0;
226         int high = max;
227         int mid;
228         int ret;
229         struct btrfs_disk_key *tmp;
230
231         while(low < high) {
232                 mid = (low + high) / 2;
233                 tmp = (struct btrfs_disk_key *)(p + mid * item_size);
234                 ret = comp_keys(tmp, key);
235
236                 if (ret < 0)
237                         low = mid + 1;
238                 else if (ret > 0)
239                         high = mid;
240                 else {
241                         *slot = mid;
242                         return 0;
243                 }
244         }
245         *slot = low;
246         return 1;
247 }
248
249 /*
250  * simple bin_search frontend that does the right thing for
251  * leaves vs nodes
252  */
253 static int bin_search(struct btrfs_node *c, struct btrfs_key *key, int *slot)
254 {
255         if (btrfs_is_leaf(c)) {
256                 struct btrfs_leaf *l = (struct btrfs_leaf *)c;
257                 return generic_bin_search((void *)l->items,
258                                           sizeof(struct btrfs_item),
259                                           key, btrfs_header_nritems(&c->header),
260                                           slot);
261         } else {
262                 return generic_bin_search((void *)c->ptrs,
263                                           sizeof(struct btrfs_key_ptr),
264                                           key, btrfs_header_nritems(&c->header),
265                                           slot);
266         }
267         return -1;
268 }
269
270 static struct buffer_head *read_node_slot(struct btrfs_root *root,
271                                    struct buffer_head *parent_buf,
272                                    int slot)
273 {
274         struct btrfs_node *node = btrfs_buffer_node(parent_buf);
275         if (slot < 0)
276                 return NULL;
277         if (slot >= btrfs_header_nritems(&node->header))
278                 return NULL;
279         return read_tree_block(root, btrfs_node_blockptr(node, slot));
280 }
281
282 static int balance_level(struct btrfs_trans_handle *trans, struct btrfs_root
283                          *root, struct btrfs_path *path, int level)
284 {
285         struct buffer_head *right_buf;
286         struct buffer_head *mid_buf;
287         struct buffer_head *left_buf;
288         struct buffer_head *parent_buf = NULL;
289         struct btrfs_node *right = NULL;
290         struct btrfs_node *mid;
291         struct btrfs_node *left = NULL;
292         struct btrfs_node *parent = NULL;
293         int ret = 0;
294         int wret;
295         int pslot;
296         int orig_slot = path->slots[level];
297         u64 orig_ptr;
298
299         if (level == 0)
300                 return 0;
301
302         mid_buf = path->nodes[level];
303         mid = btrfs_buffer_node(mid_buf);
304         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
305
306         if (level < BTRFS_MAX_LEVEL - 1)
307                 parent_buf = path->nodes[level + 1];
308         pslot = path->slots[level + 1];
309
310         /*
311          * deal with the case where there is only one pointer in the root
312          * by promoting the node below to a root
313          */
314         if (!parent_buf) {
315                 struct buffer_head *child;
316                 u64 blocknr = bh_blocknr(mid_buf);
317
318                 if (btrfs_header_nritems(&mid->header) != 1)
319                         return 0;
320
321                 /* promote the child to a root */
322                 child = read_node_slot(root, mid_buf, 0);
323                 BUG_ON(!child);
324                 root->node = child;
325                 path->nodes[level] = NULL;
326                 clean_tree_block(trans, root, mid_buf);
327                 wait_on_buffer(mid_buf);
328                 /* once for the path */
329                 btrfs_block_release(root, mid_buf);
330                 /* once for the root ptr */
331                 btrfs_block_release(root, mid_buf);
332                 return btrfs_free_extent(trans, root, blocknr, 1, 1);
333         }
334         parent = btrfs_buffer_node(parent_buf);
335
336         if (btrfs_header_nritems(&mid->header) >
337             BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
338                 return 0;
339
340         left_buf = read_node_slot(root, parent_buf, pslot - 1);
341         right_buf = read_node_slot(root, parent_buf, pslot + 1);
342
343         /* first, try to make some room in the middle buffer */
344         if (left_buf) {
345                 btrfs_cow_block(trans, root, left_buf, parent_buf, pslot - 1,
346                                 &left_buf);
347                 left = btrfs_buffer_node(left_buf);
348                 orig_slot += btrfs_header_nritems(&left->header);
349                 wret = push_node_left(trans, root, left_buf, mid_buf);
350                 if (wret < 0)
351                         ret = wret;
352         }
353
354         /*
355          * then try to empty the right most buffer into the middle
356          */
357         if (right_buf) {
358                 btrfs_cow_block(trans, root, right_buf, parent_buf, pslot + 1,
359                                 &right_buf);
360                 right = btrfs_buffer_node(right_buf);
361                 wret = push_node_left(trans, root, mid_buf, right_buf);
362                 if (wret < 0)
363                         ret = wret;
364                 if (btrfs_header_nritems(&right->header) == 0) {
365                         u64 blocknr = bh_blocknr(right_buf);
366                         clean_tree_block(trans, root, right_buf);
367                         wait_on_buffer(right_buf);
368                         btrfs_block_release(root, right_buf);
369                         right_buf = NULL;
370                         right = NULL;
371                         wret = del_ptr(trans, root, path, level + 1, pslot +
372                                        1);
373                         if (wret)
374                                 ret = wret;
375                         wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
376                         if (wret)
377                                 ret = wret;
378                 } else {
379                         btrfs_memcpy(root, parent,
380                                      &parent->ptrs[pslot + 1].key,
381                                      &right->ptrs[0].key,
382                                      sizeof(struct btrfs_disk_key));
383                         btrfs_mark_buffer_dirty(parent_buf);
384                 }
385         }
386         if (btrfs_header_nritems(&mid->header) == 1) {
387                 /*
388                  * we're not allowed to leave a node with one item in the
389                  * tree during a delete.  A deletion from lower in the tree
390                  * could try to delete the only pointer in this node.
391                  * So, pull some keys from the left.
392                  * There has to be a left pointer at this point because
393                  * otherwise we would have pulled some pointers from the
394                  * right
395                  */
396                 BUG_ON(!left_buf);
397                 wret = balance_node_right(trans, root, mid_buf, left_buf);
398                 if (wret < 0)
399                         ret = wret;
400                 BUG_ON(wret == 1);
401         }
402         if (btrfs_header_nritems(&mid->header) == 0) {
403                 /* we've managed to empty the middle node, drop it */
404                 u64 blocknr = bh_blocknr(mid_buf);
405                 clean_tree_block(trans, root, mid_buf);
406                 wait_on_buffer(mid_buf);
407                 btrfs_block_release(root, mid_buf);
408                 mid_buf = NULL;
409                 mid = NULL;
410                 wret = del_ptr(trans, root, path, level + 1, pslot);
411                 if (wret)
412                         ret = wret;
413                 wret = btrfs_free_extent(trans, root, blocknr, 1, 1);
414                 if (wret)
415                         ret = wret;
416         } else {
417                 /* update the parent key to reflect our changes */
418                 btrfs_memcpy(root, parent,
419                              &parent->ptrs[pslot].key, &mid->ptrs[0].key,
420                              sizeof(struct btrfs_disk_key));
421                 btrfs_mark_buffer_dirty(parent_buf);
422         }
423
424         /* update the path */
425         if (left_buf) {
426                 if (btrfs_header_nritems(&left->header) > orig_slot) {
427                         get_bh(left_buf);
428                         path->nodes[level] = left_buf;
429                         path->slots[level + 1] -= 1;
430                         path->slots[level] = orig_slot;
431                         if (mid_buf)
432                                 btrfs_block_release(root, mid_buf);
433                 } else {
434                         orig_slot -= btrfs_header_nritems(&left->header);
435                         path->slots[level] = orig_slot;
436                 }
437         }
438         /* double check we haven't messed things up */
439         check_block(root, path, level);
440         if (orig_ptr !=
441             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[level]),
442                                 path->slots[level]))
443                 BUG();
444
445         if (right_buf)
446                 btrfs_block_release(root, right_buf);
447         if (left_buf)
448                 btrfs_block_release(root, left_buf);
449         return ret;
450 }
451
452 /* returns zero if the push worked, non-zero otherwise */
453 static int push_nodes_for_insert(struct btrfs_trans_handle *trans,
454                                 struct btrfs_root *root,
455                                 struct btrfs_path *path, int level)
456 {
457         struct buffer_head *right_buf;
458         struct buffer_head *mid_buf;
459         struct buffer_head *left_buf;
460         struct buffer_head *parent_buf = NULL;
461         struct btrfs_node *right = NULL;
462         struct btrfs_node *mid;
463         struct btrfs_node *left = NULL;
464         struct btrfs_node *parent = NULL;
465         int ret = 0;
466         int wret;
467         int pslot;
468         int orig_slot = path->slots[level];
469         u64 orig_ptr;
470
471         if (level == 0)
472                 return 1;
473
474         mid_buf = path->nodes[level];
475         mid = btrfs_buffer_node(mid_buf);
476         orig_ptr = btrfs_node_blockptr(mid, orig_slot);
477
478         if (level < BTRFS_MAX_LEVEL - 1)
479                 parent_buf = path->nodes[level + 1];
480         pslot = path->slots[level + 1];
481
482         if (!parent_buf)
483                 return 1;
484         parent = btrfs_buffer_node(parent_buf);
485
486         left_buf = read_node_slot(root, parent_buf, pslot - 1);
487
488         /* first, try to make some room in the middle buffer */
489         if (left_buf) {
490                 u32 left_nr;
491                 left = btrfs_buffer_node(left_buf);
492                 left_nr = btrfs_header_nritems(&left->header);
493                 if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
494                         wret = 1;
495                 } else {
496                         btrfs_cow_block(trans, root, left_buf, parent_buf,
497                                         pslot - 1, &left_buf);
498                         left = btrfs_buffer_node(left_buf);
499                         wret = push_node_left(trans, root, left_buf, mid_buf);
500                 }
501                 if (wret < 0)
502                         ret = wret;
503                 if (wret == 0) {
504                         orig_slot += left_nr;
505                         btrfs_memcpy(root, parent,
506                                      &parent->ptrs[pslot].key,
507                                      &mid->ptrs[0].key,
508                                      sizeof(struct btrfs_disk_key));
509                         btrfs_mark_buffer_dirty(parent_buf);
510                         if (btrfs_header_nritems(&left->header) > orig_slot) {
511                                 path->nodes[level] = left_buf;
512                                 path->slots[level + 1] -= 1;
513                                 path->slots[level] = orig_slot;
514                                 btrfs_block_release(root, mid_buf);
515                         } else {
516                                 orig_slot -=
517                                         btrfs_header_nritems(&left->header);
518                                 path->slots[level] = orig_slot;
519                                 btrfs_block_release(root, left_buf);
520                         }
521                         check_node(root, path, level);
522                         return 0;
523                 }
524                 btrfs_block_release(root, left_buf);
525         }
526         right_buf = read_node_slot(root, parent_buf, pslot + 1);
527
528         /*
529          * then try to empty the right most buffer into the middle
530          */
531         if (right_buf) {
532                 u32 right_nr;
533                 right = btrfs_buffer_node(right_buf);
534                 right_nr = btrfs_header_nritems(&right->header);
535                 if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
536                         wret = 1;
537                 } else {
538                         btrfs_cow_block(trans, root, right_buf,
539                                         parent_buf, pslot + 1, &right_buf);
540                         right = btrfs_buffer_node(right_buf);
541                         wret = balance_node_right(trans, root,
542                                                   right_buf, mid_buf);
543                 }
544                 if (wret < 0)
545                         ret = wret;
546                 if (wret == 0) {
547                         btrfs_memcpy(root, parent,
548                                      &parent->ptrs[pslot + 1].key,
549                                      &right->ptrs[0].key,
550                                      sizeof(struct btrfs_disk_key));
551                         btrfs_mark_buffer_dirty(parent_buf);
552                         if (btrfs_header_nritems(&mid->header) <= orig_slot) {
553                                 path->nodes[level] = right_buf;
554                                 path->slots[level + 1] += 1;
555                                 path->slots[level] = orig_slot -
556                                         btrfs_header_nritems(&mid->header);
557                                 btrfs_block_release(root, mid_buf);
558                         } else {
559                                 btrfs_block_release(root, right_buf);
560                         }
561                         check_node(root, path, level);
562                         return 0;
563                 }
564                 btrfs_block_release(root, right_buf);
565         }
566         check_node(root, path, level);
567         return 1;
568 }
569
570 /*
571  * look for key in the tree.  path is filled in with nodes along the way
572  * if key is found, we return zero and you can find the item in the leaf
573  * level of the path (level 0)
574  *
575  * If the key isn't found, the path points to the slot where it should
576  * be inserted, and 1 is returned.  If there are other errors during the
577  * search a negative error number is returned.
578  *
579  * if ins_len > 0, nodes and leaves will be split as we walk down the
580  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
581  * possible)
582  */
583 int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
584                       *root, struct btrfs_key *key, struct btrfs_path *p, int
585                       ins_len, int cow)
586 {
587         struct buffer_head *b;
588         struct buffer_head *cow_buf;
589         struct btrfs_node *c;
590         int slot;
591         int ret;
592         int level;
593
594         WARN_ON(p->nodes[0] != NULL);
595         WARN_ON(!mutex_is_locked(&root->fs_info->fs_mutex));
596 again:
597         b = root->node;
598         get_bh(b);
599         while (b) {
600                 c = btrfs_buffer_node(b);
601                 level = btrfs_header_level(&c->header);
602                 if (cow) {
603                         int wret;
604                         wret = btrfs_cow_block(trans, root, b,
605                                                p->nodes[level + 1],
606                                                p->slots[level + 1],
607                                                &cow_buf);
608                         b = cow_buf;
609                         c = btrfs_buffer_node(b);
610                 }
611                 BUG_ON(!cow && ins_len);
612                 if (level != btrfs_header_level(&c->header))
613                         WARN_ON(1);
614                 level = btrfs_header_level(&c->header);
615                 p->nodes[level] = b;
616                 ret = check_block(root, p, level);
617                 if (ret)
618                         return -1;
619                 ret = bin_search(c, key, &slot);
620                 if (!btrfs_is_leaf(c)) {
621                         if (ret && slot > 0)
622                                 slot -= 1;
623                         p->slots[level] = slot;
624                         if (ins_len > 0 && btrfs_header_nritems(&c->header) >=
625                             BTRFS_NODEPTRS_PER_BLOCK(root) - 1) {
626                                 int sret = split_node(trans, root, p, level);
627                                 BUG_ON(sret > 0);
628                                 if (sret)
629                                         return sret;
630                                 b = p->nodes[level];
631                                 c = btrfs_buffer_node(b);
632                                 slot = p->slots[level];
633                         } else if (ins_len < 0) {
634                                 int sret = balance_level(trans, root, p,
635                                                          level);
636                                 if (sret)
637                                         return sret;
638                                 b = p->nodes[level];
639                                 if (!b)
640                                         goto again;
641                                 c = btrfs_buffer_node(b);
642                                 slot = p->slots[level];
643                                 BUG_ON(btrfs_header_nritems(&c->header) == 1);
644                         }
645                         b = read_tree_block(root, btrfs_node_blockptr(c, slot));
646                 } else {
647                         struct btrfs_leaf *l = (struct btrfs_leaf *)c;
648                         p->slots[level] = slot;
649                         if (ins_len > 0 && btrfs_leaf_free_space(root, l) <
650                             sizeof(struct btrfs_item) + ins_len) {
651                                 int sret = split_leaf(trans, root, key,
652                                                       p, ins_len);
653                                 BUG_ON(sret > 0);
654                                 if (sret)
655                                         return sret;
656                         }
657                         return ret;
658                 }
659         }
660         return 1;
661 }
662
663 /*
664  * adjust the pointers going up the tree, starting at level
665  * making sure the right key of each node is points to 'key'.
666  * This is used after shifting pointers to the left, so it stops
667  * fixing up pointers when a given leaf/node is not in slot 0 of the
668  * higher levels
669  *
670  * If this fails to write a tree block, it returns -1, but continues
671  * fixing up the blocks in ram so the tree is consistent.
672  */
673 static int fixup_low_keys(struct btrfs_trans_handle *trans, struct btrfs_root
674                           *root, struct btrfs_path *path, struct btrfs_disk_key
675                           *key, int level)
676 {
677         int i;
678         int ret = 0;
679         for (i = level; i < BTRFS_MAX_LEVEL; i++) {
680                 struct btrfs_node *t;
681                 int tslot = path->slots[i];
682                 if (!path->nodes[i])
683                         break;
684                 t = btrfs_buffer_node(path->nodes[i]);
685                 btrfs_memcpy(root, t, &t->ptrs[tslot].key, key, sizeof(*key));
686                 btrfs_mark_buffer_dirty(path->nodes[i]);
687                 if (tslot != 0)
688                         break;
689         }
690         return ret;
691 }
692
693 /*
694  * try to push data from one node into the next node left in the
695  * tree.
696  *
697  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
698  * error, and > 0 if there was no room in the left hand block.
699  */
700 static int push_node_left(struct btrfs_trans_handle *trans, struct btrfs_root
701                           *root, struct buffer_head *dst_buf, struct
702                           buffer_head *src_buf)
703 {
704         struct btrfs_node *src = btrfs_buffer_node(src_buf);
705         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
706         int push_items = 0;
707         int src_nritems;
708         int dst_nritems;
709         int ret = 0;
710
711         src_nritems = btrfs_header_nritems(&src->header);
712         dst_nritems = btrfs_header_nritems(&dst->header);
713         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
714         if (push_items <= 0) {
715                 return 1;
716         }
717
718         if (src_nritems < push_items)
719                 push_items = src_nritems;
720
721         btrfs_memcpy(root, dst, dst->ptrs + dst_nritems, src->ptrs,
722                      push_items * sizeof(struct btrfs_key_ptr));
723         if (push_items < src_nritems) {
724                 btrfs_memmove(root, src, src->ptrs, src->ptrs + push_items,
725                         (src_nritems - push_items) *
726                         sizeof(struct btrfs_key_ptr));
727         }
728         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
729         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
730         btrfs_mark_buffer_dirty(src_buf);
731         btrfs_mark_buffer_dirty(dst_buf);
732         return ret;
733 }
734
735 /*
736  * try to push data from one node into the next node right in the
737  * tree.
738  *
739  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
740  * error, and > 0 if there was no room in the right hand block.
741  *
742  * this will  only push up to 1/2 the contents of the left node over
743  */
744 static int balance_node_right(struct btrfs_trans_handle *trans, struct
745                               btrfs_root *root, struct buffer_head *dst_buf,
746                               struct buffer_head *src_buf)
747 {
748         struct btrfs_node *src = btrfs_buffer_node(src_buf);
749         struct btrfs_node *dst = btrfs_buffer_node(dst_buf);
750         int push_items = 0;
751         int max_push;
752         int src_nritems;
753         int dst_nritems;
754         int ret = 0;
755
756         src_nritems = btrfs_header_nritems(&src->header);
757         dst_nritems = btrfs_header_nritems(&dst->header);
758         push_items = BTRFS_NODEPTRS_PER_BLOCK(root) - dst_nritems;
759         if (push_items <= 0) {
760                 return 1;
761         }
762
763         max_push = src_nritems / 2 + 1;
764         /* don't try to empty the node */
765         if (max_push > src_nritems)
766                 return 1;
767         if (max_push < push_items)
768                 push_items = max_push;
769
770         btrfs_memmove(root, dst, dst->ptrs + push_items, dst->ptrs,
771                       dst_nritems * sizeof(struct btrfs_key_ptr));
772
773         btrfs_memcpy(root, dst, dst->ptrs,
774                      src->ptrs + src_nritems - push_items,
775                      push_items * sizeof(struct btrfs_key_ptr));
776
777         btrfs_set_header_nritems(&src->header, src_nritems - push_items);
778         btrfs_set_header_nritems(&dst->header, dst_nritems + push_items);
779
780         btrfs_mark_buffer_dirty(src_buf);
781         btrfs_mark_buffer_dirty(dst_buf);
782         return ret;
783 }
784
785 /*
786  * helper function to insert a new root level in the tree.
787  * A new node is allocated, and a single item is inserted to
788  * point to the existing root
789  *
790  * returns zero on success or < 0 on failure.
791  */
792 static int insert_new_root(struct btrfs_trans_handle *trans, struct btrfs_root
793                            *root, struct btrfs_path *path, int level)
794 {
795         struct buffer_head *t;
796         struct btrfs_node *lower;
797         struct btrfs_node *c;
798         struct btrfs_disk_key *lower_key;
799
800         BUG_ON(path->nodes[level]);
801         BUG_ON(path->nodes[level-1] != root->node);
802
803         t = btrfs_alloc_free_block(trans, root, root->node->b_blocknr);
804         c = btrfs_buffer_node(t);
805         memset(c, 0, root->blocksize);
806         btrfs_set_header_nritems(&c->header, 1);
807         btrfs_set_header_level(&c->header, level);
808         btrfs_set_header_blocknr(&c->header, bh_blocknr(t));
809         btrfs_set_header_generation(&c->header, trans->transid);
810         btrfs_set_header_owner(&c->header, root->root_key.objectid);
811         lower = btrfs_buffer_node(path->nodes[level-1]);
812         memcpy(c->header.fsid, root->fs_info->disk_super->fsid,
813                sizeof(c->header.fsid));
814         if (btrfs_is_leaf(lower))
815                 lower_key = &((struct btrfs_leaf *)lower)->items[0].key;
816         else
817                 lower_key = &lower->ptrs[0].key;
818         btrfs_memcpy(root, c, &c->ptrs[0].key, lower_key,
819                      sizeof(struct btrfs_disk_key));
820         btrfs_set_node_blockptr(c, 0, bh_blocknr(path->nodes[level - 1]));
821
822         btrfs_mark_buffer_dirty(t);
823
824         /* the super has an extra ref to root->node */
825         btrfs_block_release(root, root->node);
826         root->node = t;
827         get_bh(t);
828         path->nodes[level] = t;
829         path->slots[level] = 0;
830         return 0;
831 }
832
833 /*
834  * worker function to insert a single pointer in a node.
835  * the node should have enough room for the pointer already
836  *
837  * slot and level indicate where you want the key to go, and
838  * blocknr is the block the key points to.
839  *
840  * returns zero on success and < 0 on any error
841  */
842 static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root
843                       *root, struct btrfs_path *path, struct btrfs_disk_key
844                       *key, u64 blocknr, int slot, int level)
845 {
846         struct btrfs_node *lower;
847         int nritems;
848
849         BUG_ON(!path->nodes[level]);
850         lower = btrfs_buffer_node(path->nodes[level]);
851         nritems = btrfs_header_nritems(&lower->header);
852         if (slot > nritems)
853                 BUG();
854         if (nritems == BTRFS_NODEPTRS_PER_BLOCK(root))
855                 BUG();
856         if (slot != nritems) {
857                 btrfs_memmove(root, lower, lower->ptrs + slot + 1,
858                               lower->ptrs + slot,
859                               (nritems - slot) * sizeof(struct btrfs_key_ptr));
860         }
861         btrfs_memcpy(root, lower, &lower->ptrs[slot].key,
862                      key, sizeof(struct btrfs_disk_key));
863         btrfs_set_node_blockptr(lower, slot, blocknr);
864         btrfs_set_header_nritems(&lower->header, nritems + 1);
865         btrfs_mark_buffer_dirty(path->nodes[level]);
866         return 0;
867 }
868
869 /*
870  * split the node at the specified level in path in two.
871  * The path is corrected to point to the appropriate node after the split
872  *
873  * Before splitting this tries to make some room in the node by pushing
874  * left and right, if either one works, it returns right away.
875  *
876  * returns 0 on success and < 0 on failure
877  */
878 static int split_node(struct btrfs_trans_handle *trans, struct btrfs_root
879                       *root, struct btrfs_path *path, int level)
880 {
881         struct buffer_head *t;
882         struct btrfs_node *c;
883         struct buffer_head *split_buffer;
884         struct btrfs_node *split;
885         int mid;
886         int ret;
887         int wret;
888         u32 c_nritems;
889
890         t = path->nodes[level];
891         c = btrfs_buffer_node(t);
892         if (t == root->node) {
893                 /* trying to split the root, lets make a new one */
894                 ret = insert_new_root(trans, root, path, level + 1);
895                 if (ret)
896                         return ret;
897         } else {
898                 ret = push_nodes_for_insert(trans, root, path, level);
899                 t = path->nodes[level];
900                 c = btrfs_buffer_node(t);
901                 if (!ret &&
902                     btrfs_header_nritems(&c->header) <
903                     BTRFS_NODEPTRS_PER_BLOCK(root) - 1)
904                         return 0;
905         }
906
907         c_nritems = btrfs_header_nritems(&c->header);
908         split_buffer = btrfs_alloc_free_block(trans, root, t->b_blocknr);
909         split = btrfs_buffer_node(split_buffer);
910         btrfs_set_header_flags(&split->header, btrfs_header_flags(&c->header));
911         btrfs_set_header_level(&split->header, btrfs_header_level(&c->header));
912         btrfs_set_header_blocknr(&split->header, bh_blocknr(split_buffer));
913         btrfs_set_header_generation(&split->header, trans->transid);
914         btrfs_set_header_owner(&split->header, root->root_key.objectid);
915         memcpy(split->header.fsid, root->fs_info->disk_super->fsid,
916                sizeof(split->header.fsid));
917         mid = (c_nritems + 1) / 2;
918         btrfs_memcpy(root, split, split->ptrs, c->ptrs + mid,
919                      (c_nritems - mid) * sizeof(struct btrfs_key_ptr));
920         btrfs_set_header_nritems(&split->header, c_nritems - mid);
921         btrfs_set_header_nritems(&c->header, mid);
922         ret = 0;
923
924         btrfs_mark_buffer_dirty(t);
925         btrfs_mark_buffer_dirty(split_buffer);
926         wret = insert_ptr(trans, root, path, &split->ptrs[0].key,
927                           bh_blocknr(split_buffer), path->slots[level + 1] + 1,
928                           level + 1);
929         if (wret)
930                 ret = wret;
931
932         if (path->slots[level] >= mid) {
933                 path->slots[level] -= mid;
934                 btrfs_block_release(root, t);
935                 path->nodes[level] = split_buffer;
936                 path->slots[level + 1] += 1;
937         } else {
938                 btrfs_block_release(root, split_buffer);
939         }
940         return ret;
941 }
942
943 /*
944  * how many bytes are required to store the items in a leaf.  start
945  * and nr indicate which items in the leaf to check.  This totals up the
946  * space used both by the item structs and the item data
947  */
948 static int leaf_space_used(struct btrfs_leaf *l, int start, int nr)
949 {
950         int data_len;
951         int nritems = btrfs_header_nritems(&l->header);
952         int end = min(nritems, start + nr) - 1;
953
954         if (!nr)
955                 return 0;
956         data_len = btrfs_item_end(l->items + start);
957         data_len = data_len - btrfs_item_offset(l->items + end);
958         data_len += sizeof(struct btrfs_item) * nr;
959         WARN_ON(data_len < 0);
960         return data_len;
961 }
962
963 /*
964  * The space between the end of the leaf items and
965  * the start of the leaf data.  IOW, how much room
966  * the leaf has left for both items and data
967  */
968 int btrfs_leaf_free_space(struct btrfs_root *root, struct btrfs_leaf *leaf)
969 {
970         int nritems = btrfs_header_nritems(&leaf->header);
971         return BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
972 }
973
974 /*
975  * push some data in the path leaf to the right, trying to free up at
976  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
977  *
978  * returns 1 if the push failed because the other node didn't have enough
979  * room, 0 if everything worked out and < 0 if there were major errors.
980  */
981 static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
982                            *root, struct btrfs_path *path, int data_size)
983 {
984         struct buffer_head *left_buf = path->nodes[0];
985         struct btrfs_leaf *left = btrfs_buffer_leaf(left_buf);
986         struct btrfs_leaf *right;
987         struct buffer_head *right_buf;
988         struct buffer_head *upper;
989         struct btrfs_node *upper_node;
990         int slot;
991         int i;
992         int free_space;
993         int push_space = 0;
994         int push_items = 0;
995         struct btrfs_item *item;
996         u32 left_nritems;
997         u32 right_nritems;
998
999         slot = path->slots[1];
1000         if (!path->nodes[1]) {
1001                 return 1;
1002         }
1003         upper = path->nodes[1];
1004         upper_node = btrfs_buffer_node(upper);
1005         if (slot >= btrfs_header_nritems(&upper_node->header) - 1) {
1006                 return 1;
1007         }
1008         right_buf = read_tree_block(root,
1009                     btrfs_node_blockptr(btrfs_buffer_node(upper), slot + 1));
1010         right = btrfs_buffer_leaf(right_buf);
1011         free_space = btrfs_leaf_free_space(root, right);
1012         if (free_space < data_size + sizeof(struct btrfs_item)) {
1013                 btrfs_block_release(root, right_buf);
1014                 return 1;
1015         }
1016         /* cow and double check */
1017         btrfs_cow_block(trans, root, right_buf, upper, slot + 1, &right_buf);
1018         right = btrfs_buffer_leaf(right_buf);
1019         free_space = btrfs_leaf_free_space(root, right);
1020         if (free_space < data_size + sizeof(struct btrfs_item)) {
1021                 btrfs_block_release(root, right_buf);
1022                 return 1;
1023         }
1024
1025         left_nritems = btrfs_header_nritems(&left->header);
1026         if (left_nritems == 0) {
1027                 btrfs_block_release(root, right_buf);
1028                 return 1;
1029         }
1030         for (i = left_nritems - 1; i >= 1; i--) {
1031                 item = left->items + i;
1032                 if (path->slots[0] == i)
1033                         push_space += data_size + sizeof(*item);
1034                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1035                     free_space)
1036                         break;
1037                 push_items++;
1038                 push_space += btrfs_item_size(item) + sizeof(*item);
1039         }
1040         if (push_items == 0) {
1041                 btrfs_block_release(root, right_buf);
1042                 return 1;
1043         }
1044         if (push_items == left_nritems)
1045                 WARN_ON(1);
1046         right_nritems = btrfs_header_nritems(&right->header);
1047         /* push left to right */
1048         push_space = btrfs_item_end(left->items + left_nritems - push_items);
1049         push_space -= leaf_data_end(root, left);
1050         /* make room in the right data area */
1051         btrfs_memmove(root, right, btrfs_leaf_data(right) +
1052                       leaf_data_end(root, right) - push_space,
1053                       btrfs_leaf_data(right) +
1054                       leaf_data_end(root, right), BTRFS_LEAF_DATA_SIZE(root) -
1055                       leaf_data_end(root, right));
1056         /* copy from the left data area */
1057         btrfs_memcpy(root, right, btrfs_leaf_data(right) +
1058                      BTRFS_LEAF_DATA_SIZE(root) - push_space,
1059                      btrfs_leaf_data(left) + leaf_data_end(root, left),
1060                      push_space);
1061         btrfs_memmove(root, right, right->items + push_items, right->items,
1062                 right_nritems * sizeof(struct btrfs_item));
1063         /* copy the items from left to right */
1064         btrfs_memcpy(root, right, right->items, left->items +
1065                      left_nritems - push_items,
1066                      push_items * sizeof(struct btrfs_item));
1067
1068         /* update the item pointers */
1069         right_nritems += push_items;
1070         btrfs_set_header_nritems(&right->header, right_nritems);
1071         push_space = BTRFS_LEAF_DATA_SIZE(root);
1072         for (i = 0; i < right_nritems; i++) {
1073                 btrfs_set_item_offset(right->items + i, push_space -
1074                                       btrfs_item_size(right->items + i));
1075                 push_space = btrfs_item_offset(right->items + i);
1076         }
1077         left_nritems -= push_items;
1078         btrfs_set_header_nritems(&left->header, left_nritems);
1079
1080         btrfs_mark_buffer_dirty(left_buf);
1081         btrfs_mark_buffer_dirty(right_buf);
1082
1083         btrfs_memcpy(root, upper_node, &upper_node->ptrs[slot + 1].key,
1084                 &right->items[0].key, sizeof(struct btrfs_disk_key));
1085         btrfs_mark_buffer_dirty(upper);
1086
1087         /* then fixup the leaf pointer in the path */
1088         if (path->slots[0] >= left_nritems) {
1089                 path->slots[0] -= left_nritems;
1090                 btrfs_block_release(root, path->nodes[0]);
1091                 path->nodes[0] = right_buf;
1092                 path->slots[1] += 1;
1093         } else {
1094                 btrfs_block_release(root, right_buf);
1095         }
1096         return 0;
1097 }
1098 /*
1099  * push some data in the path leaf to the left, trying to free up at
1100  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
1101  */
1102 static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
1103                           *root, struct btrfs_path *path, int data_size)
1104 {
1105         struct buffer_head *right_buf = path->nodes[0];
1106         struct btrfs_leaf *right = btrfs_buffer_leaf(right_buf);
1107         struct buffer_head *t;
1108         struct btrfs_leaf *left;
1109         int slot;
1110         int i;
1111         int free_space;
1112         int push_space = 0;
1113         int push_items = 0;
1114         struct btrfs_item *item;
1115         u32 old_left_nritems;
1116         int ret = 0;
1117         int wret;
1118
1119         slot = path->slots[1];
1120         if (slot == 0) {
1121                 return 1;
1122         }
1123         if (!path->nodes[1]) {
1124                 return 1;
1125         }
1126         t = read_tree_block(root,
1127             btrfs_node_blockptr(btrfs_buffer_node(path->nodes[1]), slot - 1));
1128         left = btrfs_buffer_leaf(t);
1129         free_space = btrfs_leaf_free_space(root, left);
1130         if (free_space < data_size + sizeof(struct btrfs_item)) {
1131                 btrfs_block_release(root, t);
1132                 return 1;
1133         }
1134
1135         /* cow and double check */
1136         btrfs_cow_block(trans, root, t, path->nodes[1], slot - 1, &t);
1137         left = btrfs_buffer_leaf(t);
1138         free_space = btrfs_leaf_free_space(root, left);
1139         if (free_space < data_size + sizeof(struct btrfs_item)) {
1140                 btrfs_block_release(root, t);
1141                 return 1;
1142         }
1143
1144         if (btrfs_header_nritems(&right->header) == 0) {
1145                 btrfs_block_release(root, t);
1146                 return 1;
1147         }
1148
1149         for (i = 0; i < btrfs_header_nritems(&right->header) - 1; i++) {
1150                 item = right->items + i;
1151                 if (path->slots[0] == i)
1152                         push_space += data_size + sizeof(*item);
1153                 if (btrfs_item_size(item) + sizeof(*item) + push_space >
1154                     free_space)
1155                         break;
1156                 push_items++;
1157                 push_space += btrfs_item_size(item) + sizeof(*item);
1158         }
1159         if (push_items == 0) {
1160                 btrfs_block_release(root, t);
1161                 return 1;
1162         }
1163         if (push_items == btrfs_header_nritems(&right->header))
1164                 WARN_ON(1);
1165         /* push data from right to left */
1166         btrfs_memcpy(root, left, left->items +
1167                      btrfs_header_nritems(&left->header),
1168                      right->items, push_items * sizeof(struct btrfs_item));
1169         push_space = BTRFS_LEAF_DATA_SIZE(root) -
1170                      btrfs_item_offset(right->items + push_items -1);
1171         btrfs_memcpy(root, left, btrfs_leaf_data(left) +
1172                      leaf_data_end(root, left) - push_space,
1173                      btrfs_leaf_data(right) +
1174                      btrfs_item_offset(right->items + push_items - 1),
1175                      push_space);
1176         old_left_nritems = btrfs_header_nritems(&left->header);
1177         BUG_ON(old_left_nritems < 0);
1178
1179         for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
1180                 u32 ioff = btrfs_item_offset(left->items + i);
1181                 btrfs_set_item_offset(left->items + i, ioff -
1182                                      (BTRFS_LEAF_DATA_SIZE(root) -
1183                                       btrfs_item_offset(left->items +
1184                                                         old_left_nritems - 1)));
1185         }
1186         btrfs_set_header_nritems(&left->header, old_left_nritems + push_items);
1187
1188         /* fixup right node */
1189         push_space = btrfs_item_offset(right->items + push_items - 1) -
1190                      leaf_data_end(root, right);
1191         btrfs_memmove(root, right, btrfs_leaf_data(right) +
1192                       BTRFS_LEAF_DATA_SIZE(root) - push_space,
1193                       btrfs_leaf_data(right) +
1194                       leaf_data_end(root, right), push_space);
1195         btrfs_memmove(root, right, right->items, right->items + push_items,
1196                 (btrfs_header_nritems(&right->header) - push_items) *
1197                 sizeof(struct btrfs_item));
1198         btrfs_set_header_nritems(&right->header,
1199                                  btrfs_header_nritems(&right->header) -
1200                                  push_items);
1201         push_space = BTRFS_LEAF_DATA_SIZE(root);
1202
1203         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1204                 btrfs_set_item_offset(right->items + i, push_space -
1205                                       btrfs_item_size(right->items + i));
1206                 push_space = btrfs_item_offset(right->items + i);
1207         }
1208
1209         btrfs_mark_buffer_dirty(t);
1210         btrfs_mark_buffer_dirty(right_buf);
1211         wret = fixup_low_keys(trans, root, path, &right->items[0].key, 1);
1212         if (wret)
1213                 ret = wret;
1214
1215         /* then fixup the leaf pointer in the path */
1216         if (path->slots[0] < push_items) {
1217                 path->slots[0] += old_left_nritems;
1218                 btrfs_block_release(root, path->nodes[0]);
1219                 path->nodes[0] = t;
1220                 path->slots[1] -= 1;
1221         } else {
1222                 btrfs_block_release(root, t);
1223                 path->slots[0] -= push_items;
1224         }
1225         BUG_ON(path->slots[0] < 0);
1226         return ret;
1227 }
1228
1229 /*
1230  * split the path's leaf in two, making sure there is at least data_size
1231  * available for the resulting leaf level of the path.
1232  *
1233  * returns 0 if all went well and < 0 on failure.
1234  */
1235 static int split_leaf(struct btrfs_trans_handle *trans, struct btrfs_root
1236                       *root, struct btrfs_key *ins_key,
1237                       struct btrfs_path *path, int data_size)
1238 {
1239         struct buffer_head *l_buf;
1240         struct btrfs_leaf *l;
1241         u32 nritems;
1242         int mid;
1243         int slot;
1244         struct btrfs_leaf *right;
1245         struct buffer_head *right_buffer;
1246         int space_needed = data_size + sizeof(struct btrfs_item);
1247         int data_copy_size;
1248         int rt_data_off;
1249         int i;
1250         int ret = 0;
1251         int wret;
1252         int double_split = 0;
1253         struct btrfs_disk_key disk_key;
1254
1255         /* first try to make some room by pushing left and right */
1256         wret = push_leaf_left(trans, root, path, data_size);
1257         if (wret < 0)
1258                 return wret;
1259         if (wret) {
1260                 wret = push_leaf_right(trans, root, path, data_size);
1261                 if (wret < 0)
1262                         return wret;
1263         }
1264         l_buf = path->nodes[0];
1265         l = btrfs_buffer_leaf(l_buf);
1266
1267         /* did the pushes work? */
1268         if (btrfs_leaf_free_space(root, l) >=
1269             sizeof(struct btrfs_item) + data_size)
1270                 return 0;
1271
1272         if (!path->nodes[1]) {
1273                 ret = insert_new_root(trans, root, path, 1);
1274                 if (ret)
1275                         return ret;
1276         }
1277         slot = path->slots[0];
1278         nritems = btrfs_header_nritems(&l->header);
1279         mid = (nritems + 1)/ 2;
1280         right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
1281         BUG_ON(!right_buffer);
1282         right = btrfs_buffer_leaf(right_buffer);
1283         memset(&right->header, 0, sizeof(right->header));
1284         btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
1285         btrfs_set_header_generation(&right->header, trans->transid);
1286         btrfs_set_header_owner(&right->header, root->root_key.objectid);
1287         btrfs_set_header_level(&right->header, 0);
1288         memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1289                sizeof(right->header.fsid));
1290         if (mid <= slot) {
1291                 if (nritems == 1 ||
1292                     leaf_space_used(l, mid, nritems - mid) + space_needed >
1293                         BTRFS_LEAF_DATA_SIZE(root)) {
1294                         if (slot >= nritems) {
1295                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1296                                 btrfs_set_header_nritems(&right->header, 0);
1297                                 wret = insert_ptr(trans, root, path,
1298                                                   &disk_key,
1299                                                   bh_blocknr(right_buffer),
1300                                                   path->slots[1] + 1, 1);
1301                                 if (wret)
1302                                         ret = wret;
1303                                 btrfs_block_release(root, path->nodes[0]);
1304                                 path->nodes[0] = right_buffer;
1305                                 path->slots[0] = 0;
1306                                 path->slots[1] += 1;
1307                                 return ret;
1308                         }
1309                         mid = slot;
1310                         double_split = 1;
1311                 }
1312         } else {
1313                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
1314                         BTRFS_LEAF_DATA_SIZE(root)) {
1315                         if (slot == 0) {
1316                                 btrfs_cpu_key_to_disk(&disk_key, ins_key);
1317                                 btrfs_set_header_nritems(&right->header, 0);
1318                                 wret = insert_ptr(trans, root, path,
1319                                                   &disk_key,
1320                                                   bh_blocknr(right_buffer),
1321                                                   path->slots[1] - 1, 1);
1322                                 if (wret)
1323                                         ret = wret;
1324                                 btrfs_block_release(root, path->nodes[0]);
1325                                 path->nodes[0] = right_buffer;
1326                                 path->slots[0] = 0;
1327                                 path->slots[1] -= 1;
1328                                 if (path->slots[1] == 0) {
1329                                         wret = fixup_low_keys(trans, root,
1330                                                    path, &disk_key, 1);
1331                                         if (wret)
1332                                                 ret = wret;
1333                                 }
1334                                 return ret;
1335                         }
1336                         mid = slot;
1337                         double_split = 1;
1338                 }
1339         }
1340         btrfs_set_header_nritems(&right->header, nritems - mid);
1341         data_copy_size = btrfs_item_end(l->items + mid) -
1342                          leaf_data_end(root, l);
1343         btrfs_memcpy(root, right, right->items, l->items + mid,
1344                      (nritems - mid) * sizeof(struct btrfs_item));
1345         btrfs_memcpy(root, right,
1346                      btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) -
1347                      data_copy_size, btrfs_leaf_data(l) +
1348                      leaf_data_end(root, l), data_copy_size);
1349         rt_data_off = BTRFS_LEAF_DATA_SIZE(root) -
1350                       btrfs_item_end(l->items + mid);
1351
1352         for (i = 0; i < btrfs_header_nritems(&right->header); i++) {
1353                 u32 ioff = btrfs_item_offset(right->items + i);
1354                 btrfs_set_item_offset(right->items + i, ioff + rt_data_off);
1355         }
1356
1357         btrfs_set_header_nritems(&l->header, mid);
1358         ret = 0;
1359         wret = insert_ptr(trans, root, path, &right->items[0].key,
1360                           bh_blocknr(right_buffer), path->slots[1] + 1, 1);
1361         if (wret)
1362                 ret = wret;
1363         btrfs_mark_buffer_dirty(right_buffer);
1364         btrfs_mark_buffer_dirty(l_buf);
1365         BUG_ON(path->slots[0] != slot);
1366         if (mid <= slot) {
1367                 btrfs_block_release(root, path->nodes[0]);
1368                 path->nodes[0] = right_buffer;
1369                 path->slots[0] -= mid;
1370                 path->slots[1] += 1;
1371         } else
1372                 btrfs_block_release(root, right_buffer);
1373         BUG_ON(path->slots[0] < 0);
1374
1375         if (!double_split)
1376                 return ret;
1377         right_buffer = btrfs_alloc_free_block(trans, root, l_buf->b_blocknr);
1378         BUG_ON(!right_buffer);
1379         right = btrfs_buffer_leaf(right_buffer);
1380         memset(&right->header, 0, sizeof(right->header));
1381         btrfs_set_header_blocknr(&right->header, bh_blocknr(right_buffer));
1382         btrfs_set_header_generation(&right->header, trans->transid);
1383         btrfs_set_header_owner(&right->header, root->root_key.objectid);
1384         btrfs_set_header_level(&right->header, 0);
1385         memcpy(right->header.fsid, root->fs_info->disk_super->fsid,
1386                sizeof(right->header.fsid));
1387         btrfs_cpu_key_to_disk(&disk_key, ins_key);
1388         btrfs_set_header_nritems(&right->header, 0);
1389         wret = insert_ptr(trans, root, path,
1390                           &disk_key,
1391                           bh_blocknr(right_buffer),
1392                           path->slots[1], 1);
1393         if (wret)
1394                 ret = wret;
1395         if (path->slots[1] == 0) {
1396                 wret = fixup_low_keys(trans, root, path, &disk_key, 1);
1397                 if (wret)
1398                         ret = wret;
1399         }
1400         btrfs_block_release(root, path->nodes[0]);
1401         path->nodes[0] = right_buffer;
1402         path->slots[0] = 0;
1403         check_node(root, path, 1);
1404         check_leaf(root, path, 0);
1405         return ret;
1406 }
1407
1408 int btrfs_truncate_item(struct btrfs_trans_handle *trans,
1409                         struct btrfs_root *root,
1410                         struct btrfs_path *path,
1411                         u32 new_size)
1412 {
1413         int ret = 0;
1414         int slot;
1415         int slot_orig;
1416         struct btrfs_leaf *leaf;
1417         struct buffer_head *leaf_buf;
1418         u32 nritems;
1419         unsigned int data_end;
1420         unsigned int old_data_start;
1421         unsigned int old_size;
1422         unsigned int size_diff;
1423         int i;
1424
1425         slot_orig = path->slots[0];
1426         leaf_buf = path->nodes[0];
1427         leaf = btrfs_buffer_leaf(leaf_buf);
1428
1429         nritems = btrfs_header_nritems(&leaf->header);
1430         data_end = leaf_data_end(root, leaf);
1431
1432         slot = path->slots[0];
1433         old_data_start = btrfs_item_offset(leaf->items + slot);
1434         old_size = btrfs_item_size(leaf->items + slot);
1435         BUG_ON(old_size <= new_size);
1436         size_diff = old_size - new_size;
1437
1438         BUG_ON(slot < 0);
1439         BUG_ON(slot >= nritems);
1440
1441         /*
1442          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1443          */
1444         /* first correct the data pointers */
1445         for (i = slot; i < nritems; i++) {
1446                 u32 ioff = btrfs_item_offset(leaf->items + i);
1447                 btrfs_set_item_offset(leaf->items + i,
1448                                       ioff + size_diff);
1449         }
1450         /* shift the data */
1451         btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1452                       data_end + size_diff, btrfs_leaf_data(leaf) +
1453                       data_end, old_data_start + new_size - data_end);
1454         btrfs_set_item_size(leaf->items + slot, new_size);
1455         btrfs_mark_buffer_dirty(leaf_buf);
1456
1457         ret = 0;
1458         if (btrfs_leaf_free_space(root, leaf) < 0)
1459                 BUG();
1460         check_leaf(root, path, 0);
1461         return ret;
1462 }
1463
1464 int btrfs_extend_item(struct btrfs_trans_handle *trans, struct btrfs_root
1465                       *root, struct btrfs_path *path, u32 data_size)
1466 {
1467         int ret = 0;
1468         int slot;
1469         int slot_orig;
1470         struct btrfs_leaf *leaf;
1471         struct buffer_head *leaf_buf;
1472         u32 nritems;
1473         unsigned int data_end;
1474         unsigned int old_data;
1475         unsigned int old_size;
1476         int i;
1477
1478         slot_orig = path->slots[0];
1479         leaf_buf = path->nodes[0];
1480         leaf = btrfs_buffer_leaf(leaf_buf);
1481
1482         nritems = btrfs_header_nritems(&leaf->header);
1483         data_end = leaf_data_end(root, leaf);
1484
1485         if (btrfs_leaf_free_space(root, leaf) < data_size)
1486                 BUG();
1487         slot = path->slots[0];
1488         old_data = btrfs_item_end(leaf->items + slot);
1489
1490         BUG_ON(slot < 0);
1491         BUG_ON(slot >= nritems);
1492
1493         /*
1494          * item0..itemN ... dataN.offset..dataN.size .. data0.size
1495          */
1496         /* first correct the data pointers */
1497         for (i = slot; i < nritems; i++) {
1498                 u32 ioff = btrfs_item_offset(leaf->items + i);
1499                 btrfs_set_item_offset(leaf->items + i,
1500                                       ioff - data_size);
1501         }
1502         /* shift the data */
1503         btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1504                       data_end - data_size, btrfs_leaf_data(leaf) +
1505                       data_end, old_data - data_end);
1506         data_end = old_data;
1507         old_size = btrfs_item_size(leaf->items + slot);
1508         btrfs_set_item_size(leaf->items + slot, old_size + data_size);
1509         btrfs_mark_buffer_dirty(leaf_buf);
1510
1511         ret = 0;
1512         if (btrfs_leaf_free_space(root, leaf) < 0)
1513                 BUG();
1514         check_leaf(root, path, 0);
1515         return ret;
1516 }
1517
1518 /*
1519  * Given a key and some data, insert an item into the tree.
1520  * This does all the path init required, making room in the tree if needed.
1521  */
1522 int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, struct btrfs_root
1523                             *root, struct btrfs_path *path, struct btrfs_key
1524                             *cpu_key, u32 data_size)
1525 {
1526         int ret = 0;
1527         int slot;
1528         int slot_orig;
1529         struct btrfs_leaf *leaf;
1530         struct buffer_head *leaf_buf;
1531         u32 nritems;
1532         unsigned int data_end;
1533         struct btrfs_disk_key disk_key;
1534
1535         btrfs_cpu_key_to_disk(&disk_key, cpu_key);
1536
1537         /* create a root if there isn't one */
1538         if (!root->node)
1539                 BUG();
1540         ret = btrfs_search_slot(trans, root, cpu_key, path, data_size, 1);
1541         if (ret == 0) {
1542                 return -EEXIST;
1543         }
1544         if (ret < 0)
1545                 goto out;
1546
1547         slot_orig = path->slots[0];
1548         leaf_buf = path->nodes[0];
1549         leaf = btrfs_buffer_leaf(leaf_buf);
1550
1551         nritems = btrfs_header_nritems(&leaf->header);
1552         data_end = leaf_data_end(root, leaf);
1553
1554         if (btrfs_leaf_free_space(root, leaf) <
1555             sizeof(struct btrfs_item) + data_size) {
1556                 BUG();
1557         }
1558         slot = path->slots[0];
1559         BUG_ON(slot < 0);
1560         if (slot != nritems) {
1561                 int i;
1562                 unsigned int old_data = btrfs_item_end(leaf->items + slot);
1563
1564                 /*
1565                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
1566                  */
1567                 /* first correct the data pointers */
1568                 for (i = slot; i < nritems; i++) {
1569                         u32 ioff = btrfs_item_offset(leaf->items + i);
1570                         btrfs_set_item_offset(leaf->items + i,
1571                                               ioff - data_size);
1572                 }
1573
1574                 /* shift the items */
1575                 btrfs_memmove(root, leaf, leaf->items + slot + 1,
1576                               leaf->items + slot,
1577                               (nritems - slot) * sizeof(struct btrfs_item));
1578
1579                 /* shift the data */
1580                 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1581                               data_end - data_size, btrfs_leaf_data(leaf) +
1582                               data_end, old_data - data_end);
1583                 data_end = old_data;
1584         }
1585         /* setup the item for the new data */
1586         btrfs_memcpy(root, leaf, &leaf->items[slot].key, &disk_key,
1587                      sizeof(struct btrfs_disk_key));
1588         btrfs_set_item_offset(leaf->items + slot, data_end - data_size);
1589         btrfs_set_item_size(leaf->items + slot, data_size);
1590         btrfs_set_header_nritems(&leaf->header, nritems + 1);
1591         btrfs_mark_buffer_dirty(leaf_buf);
1592
1593         ret = 0;
1594         if (slot == 0)
1595                 ret = fixup_low_keys(trans, root, path, &disk_key, 1);
1596
1597         if (btrfs_leaf_free_space(root, leaf) < 0)
1598                 BUG();
1599         check_leaf(root, path, 0);
1600 out:
1601         return ret;
1602 }
1603
1604 /*
1605  * Given a key and some data, insert an item into the tree.
1606  * This does all the path init required, making room in the tree if needed.
1607  */
1608 int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
1609                       *root, struct btrfs_key *cpu_key, void *data, u32
1610                       data_size)
1611 {
1612         int ret = 0;
1613         struct btrfs_path *path;
1614         u8 *ptr;
1615
1616         path = btrfs_alloc_path();
1617         BUG_ON(!path);
1618         btrfs_init_path(path);
1619         ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
1620         if (!ret) {
1621                 ptr = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1622                                      path->slots[0], u8);
1623                 btrfs_memcpy(root, path->nodes[0]->b_data,
1624                              ptr, data, data_size);
1625                 btrfs_mark_buffer_dirty(path->nodes[0]);
1626         }
1627         btrfs_release_path(root, path);
1628         btrfs_free_path(path);
1629         return ret;
1630 }
1631
1632 /*
1633  * delete the pointer from a given node.
1634  *
1635  * If the delete empties a node, the node is removed from the tree,
1636  * continuing all the way the root if required.  The root is converted into
1637  * a leaf if all the nodes are emptied.
1638  */
1639 static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1640                    struct btrfs_path *path, int level, int slot)
1641 {
1642         struct btrfs_node *node;
1643         struct buffer_head *parent = path->nodes[level];
1644         u32 nritems;
1645         int ret = 0;
1646         int wret;
1647
1648         node = btrfs_buffer_node(parent);
1649         nritems = btrfs_header_nritems(&node->header);
1650         if (slot != nritems -1) {
1651                 btrfs_memmove(root, node, node->ptrs + slot,
1652                               node->ptrs + slot + 1,
1653                               sizeof(struct btrfs_key_ptr) *
1654                               (nritems - slot - 1));
1655         }
1656         nritems--;
1657         btrfs_set_header_nritems(&node->header, nritems);
1658         if (nritems == 0 && parent == root->node) {
1659                 struct btrfs_header *header = btrfs_buffer_header(root->node);
1660                 BUG_ON(btrfs_header_level(header) != 1);
1661                 /* just turn the root into a leaf and break */
1662                 btrfs_set_header_level(header, 0);
1663         } else if (slot == 0) {
1664                 wret = fixup_low_keys(trans, root, path, &node->ptrs[0].key,
1665                                       level + 1);
1666                 if (wret)
1667                         ret = wret;
1668         }
1669         btrfs_mark_buffer_dirty(parent);
1670         return ret;
1671 }
1672
1673 /*
1674  * delete the item at the leaf level in path.  If that empties
1675  * the leaf, remove it from the tree
1676  */
1677 int btrfs_del_item(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1678                    struct btrfs_path *path)
1679 {
1680         int slot;
1681         struct btrfs_leaf *leaf;
1682         struct buffer_head *leaf_buf;
1683         int doff;
1684         int dsize;
1685         int ret = 0;
1686         int wret;
1687         u32 nritems;
1688
1689         leaf_buf = path->nodes[0];
1690         leaf = btrfs_buffer_leaf(leaf_buf);
1691         slot = path->slots[0];
1692         doff = btrfs_item_offset(leaf->items + slot);
1693         dsize = btrfs_item_size(leaf->items + slot);
1694         nritems = btrfs_header_nritems(&leaf->header);
1695
1696         if (slot != nritems - 1) {
1697                 int i;
1698                 int data_end = leaf_data_end(root, leaf);
1699                 btrfs_memmove(root, leaf, btrfs_leaf_data(leaf) +
1700                               data_end + dsize,
1701                               btrfs_leaf_data(leaf) + data_end,
1702                               doff - data_end);
1703                 for (i = slot + 1; i < nritems; i++) {
1704                         u32 ioff = btrfs_item_offset(leaf->items + i);
1705                         btrfs_set_item_offset(leaf->items + i, ioff + dsize);
1706                 }
1707                 btrfs_memmove(root, leaf, leaf->items + slot,
1708                               leaf->items + slot + 1,
1709                               sizeof(struct btrfs_item) *
1710                               (nritems - slot - 1));
1711         }
1712         btrfs_set_header_nritems(&leaf->header, nritems - 1);
1713         nritems--;
1714         /* delete the leaf if we've emptied it */
1715         if (nritems == 0) {
1716                 if (leaf_buf == root->node) {
1717                         btrfs_set_header_level(&leaf->header, 0);
1718                 } else {
1719                         clean_tree_block(trans, root, leaf_buf);
1720                         wait_on_buffer(leaf_buf);
1721                         wret = del_ptr(trans, root, path, 1, path->slots[1]);
1722                         if (wret)
1723                                 ret = wret;
1724                         wret = btrfs_free_extent(trans, root,
1725                                                  bh_blocknr(leaf_buf), 1, 1);
1726                         if (wret)
1727                                 ret = wret;
1728                 }
1729         } else {
1730                 int used = leaf_space_used(leaf, 0, nritems);
1731                 if (slot == 0) {
1732                         wret = fixup_low_keys(trans, root, path,
1733                                               &leaf->items[0].key, 1);
1734                         if (wret)
1735                                 ret = wret;
1736                 }
1737
1738                 /* delete the leaf if it is mostly empty */
1739                 if (used < BTRFS_LEAF_DATA_SIZE(root) / 3) {
1740                         /* push_leaf_left fixes the path.
1741                          * make sure the path still points to our leaf
1742                          * for possible call to del_ptr below
1743                          */
1744                         slot = path->slots[1];
1745                         get_bh(leaf_buf);
1746                         wret = push_leaf_left(trans, root, path, 1);
1747                         if (wret < 0)
1748                                 ret = wret;
1749                         if (path->nodes[0] == leaf_buf &&
1750                             btrfs_header_nritems(&leaf->header)) {
1751                                 wret = push_leaf_right(trans, root, path, 1);
1752                                 if (wret < 0)
1753                                         ret = wret;
1754                         }
1755                         if (btrfs_header_nritems(&leaf->header) == 0) {
1756                                 u64 blocknr = bh_blocknr(leaf_buf);
1757                                 clean_tree_block(trans, root, leaf_buf);
1758                                 wait_on_buffer(leaf_buf);
1759                                 wret = del_ptr(trans, root, path, 1, slot);
1760                                 if (wret)
1761                                         ret = wret;
1762                                 btrfs_block_release(root, leaf_buf);
1763                                 wret = btrfs_free_extent(trans, root, blocknr,
1764                                                          1, 1);
1765                                 if (wret)
1766                                         ret = wret;
1767                         } else {
1768                                 btrfs_mark_buffer_dirty(leaf_buf);
1769                                 btrfs_block_release(root, leaf_buf);
1770                         }
1771                 } else {
1772                         btrfs_mark_buffer_dirty(leaf_buf);
1773                 }
1774         }
1775         return ret;
1776 }
1777
1778 /*
1779  * walk up the tree as far as required to find the next leaf.
1780  * returns 0 if it found something or 1 if there are no greater leaves.
1781  * returns < 0 on io errors.
1782  */
1783 int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
1784 {
1785         int slot;
1786         int level = 1;
1787         u64 blocknr;
1788         struct buffer_head *c;
1789         struct btrfs_node *c_node;
1790         struct buffer_head *next = NULL;
1791
1792         while(level < BTRFS_MAX_LEVEL) {
1793                 if (!path->nodes[level])
1794                         return 1;
1795                 slot = path->slots[level] + 1;
1796                 c = path->nodes[level];
1797                 c_node = btrfs_buffer_node(c);
1798                 if (slot >= btrfs_header_nritems(&c_node->header)) {
1799                         level++;
1800                         continue;
1801                 }
1802                 blocknr = btrfs_node_blockptr(c_node, slot);
1803                 if (next)
1804                         btrfs_block_release(root, next);
1805                 next = read_tree_block(root, blocknr);
1806                 break;
1807         }
1808         path->slots[level] = slot;
1809         while(1) {
1810                 level--;
1811                 c = path->nodes[level];
1812                 btrfs_block_release(root, c);
1813                 path->nodes[level] = next;
1814                 path->slots[level] = 0;
1815                 if (!level)
1816                         break;
1817                 next = read_tree_block(root,
1818                        btrfs_node_blockptr(btrfs_buffer_node(next), 0));
1819         }
1820         return 0;
1821 }