Btrfs: more return code checking
[linux-2.6] / fs / btrfs / ctree.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include "kerncompat.h"
4 #include "radix-tree.h"
5 #include "ctree.h"
6 #include "disk-io.h"
7 #include "print-tree.h"
8
9 static int split_node(struct ctree_root *root, struct ctree_path *path,
10                       int level);
11 static int split_leaf(struct ctree_root *root, struct ctree_path *path,
12                       int data_size);
13 static int push_node_left(struct ctree_root *root, struct ctree_path *path,
14                           int level);
15 static int push_node_right(struct ctree_root *root,
16                     struct ctree_path *path, int level);
17 static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level);
18
19 inline void init_path(struct ctree_path *p)
20 {
21         memset(p, 0, sizeof(*p));
22 }
23
24 void release_path(struct ctree_root *root, struct ctree_path *p)
25 {
26         int i;
27         for (i = 0; i < MAX_LEVEL; i++) {
28                 if (!p->nodes[i])
29                         break;
30                 tree_block_release(root, p->nodes[i]);
31         }
32         memset(p, 0, sizeof(*p));
33 }
34
35 /*
36  * The leaf data grows from end-to-front in the node.
37  * this returns the address of the start of the last item,
38  * which is the stop of the leaf data stack
39  */
40 static inline unsigned int leaf_data_end(struct leaf *leaf)
41 {
42         unsigned int nr = leaf->header.nritems;
43         if (nr == 0)
44                 return sizeof(leaf->data);
45         return leaf->items[nr-1].offset;
46 }
47
48 /*
49  * The space between the end of the leaf items and
50  * the start of the leaf data.  IOW, how much room
51  * the leaf has left for both items and data
52  */
53 int leaf_free_space(struct leaf *leaf)
54 {
55         int data_end = leaf_data_end(leaf);
56         int nritems = leaf->header.nritems;
57         char *items_end = (char *)(leaf->items + nritems + 1);
58         return (char *)(leaf->data + data_end) - (char *)items_end;
59 }
60
61 /*
62  * compare two keys in a memcmp fashion
63  */
64 int comp_keys(struct key *k1, struct key *k2)
65 {
66         if (k1->objectid > k2->objectid)
67                 return 1;
68         if (k1->objectid < k2->objectid)
69                 return -1;
70         if (k1->flags > k2->flags)
71                 return 1;
72         if (k1->flags < k2->flags)
73                 return -1;
74         if (k1->offset > k2->offset)
75                 return 1;
76         if (k1->offset < k2->offset)
77                 return -1;
78         return 0;
79 }
80
81 int check_node(struct ctree_path *path, int level)
82 {
83         int i;
84         struct node *parent = NULL;
85         struct node *node = &path->nodes[level]->node;
86         int parent_slot;
87
88         if (path->nodes[level + 1])
89                 parent = &path->nodes[level + 1]->node;
90         parent_slot = path->slots[level + 1];
91         if (parent && node->header.nritems > 0) {
92                 struct key *parent_key;
93                 parent_key = &parent->keys[parent_slot];
94                 BUG_ON(memcmp(parent_key, node->keys, sizeof(struct key)));
95                 BUG_ON(parent->blockptrs[parent_slot] != node->header.blocknr);
96         }
97         BUG_ON(node->header.nritems > NODEPTRS_PER_BLOCK);
98         for (i = 0; i < node->header.nritems - 2; i++) {
99                 BUG_ON(comp_keys(&node->keys[i], &node->keys[i+1]) >= 0);
100         }
101         return 0;
102 }
103
104 int check_leaf(struct ctree_path *path, int level)
105 {
106         int i;
107         struct leaf *leaf = &path->nodes[level]->leaf;
108         struct node *parent = NULL;
109         int parent_slot;
110
111         if (path->nodes[level + 1])
112                 parent = &path->nodes[level + 1]->node;
113         parent_slot = path->slots[level + 1];
114         if (parent && leaf->header.nritems > 0) {
115                 struct key *parent_key;
116                 parent_key = &parent->keys[parent_slot];
117                 BUG_ON(memcmp(parent_key, &leaf->items[0].key,
118                        sizeof(struct key)));
119                 BUG_ON(parent->blockptrs[parent_slot] != leaf->header.blocknr);
120         }
121         for (i = 0; i < leaf->header.nritems - 2; i++) {
122                 BUG_ON(comp_keys(&leaf->items[i].key,
123                                  &leaf->items[i+1].key) >= 0);
124                 BUG_ON(leaf->items[i].offset != leaf->items[i + 1].offset +
125                     leaf->items[i + 1].size);
126                 if (i == 0) {
127                         BUG_ON(leaf->items[i].offset + leaf->items[i].size !=
128                                 LEAF_DATA_SIZE);
129                 }
130         }
131         BUG_ON(leaf_free_space(leaf) < 0);
132         return 0;
133 }
134
135 int check_block(struct ctree_path *path, int level)
136 {
137         if (level == 0)
138                 return check_leaf(path, level);
139         return check_node(path, level);
140 }
141
142 /*
143  * search for key in the array p.  items p are item_size apart
144  * and there are 'max' items in p
145  * the slot in the array is returned via slot, and it points to
146  * the place where you would insert key if it is not found in
147  * the array.
148  *
149  * slot may point to max if the key is bigger than all of the keys
150  */
151 int generic_bin_search(char *p, int item_size, struct key *key,
152                        int max, int *slot)
153 {
154         int low = 0;
155         int high = max;
156         int mid;
157         int ret;
158         struct key *tmp;
159
160         while(low < high) {
161                 mid = (low + high) / 2;
162                 tmp = (struct key *)(p + mid * item_size);
163                 ret = comp_keys(tmp, key);
164
165                 if (ret < 0)
166                         low = mid + 1;
167                 else if (ret > 0)
168                         high = mid;
169                 else {
170                         *slot = mid;
171                         return 0;
172                 }
173         }
174         *slot = low;
175         return 1;
176 }
177
178 /*
179  * simple bin_search frontend that does the right thing for
180  * leaves vs nodes
181  */
182 int bin_search(struct node *c, struct key *key, int *slot)
183 {
184         if (is_leaf(c->header.flags)) {
185                 struct leaf *l = (struct leaf *)c;
186                 return generic_bin_search((void *)l->items, sizeof(struct item),
187                                           key, c->header.nritems, slot);
188         } else {
189                 return generic_bin_search((void *)c->keys, sizeof(struct key),
190                                           key, c->header.nritems, slot);
191         }
192         return -1;
193 }
194
195 /*
196  * look for key in the tree.  path is filled in with nodes along the way
197  * if key is found, we return zero and you can find the item in the leaf
198  * level of the path (level 0)
199  *
200  * If the key isn't found, the path points to the slot where it should
201  * be inserted, and 1 is returned.  If there are other errors during the
202  * search a negative error number is returned.
203  *
204  * if ins_len > 0, nodes and leaves will be split as we walk down the
205  * tree.  if ins_len < 0, nodes will be merged as we walk down the tree (if
206  * possible)
207  */
208 int search_slot(struct ctree_root *root, struct key *key,
209                 struct ctree_path *p, int ins_len)
210 {
211         struct tree_buffer *b = root->node;
212         struct node *c;
213         int slot;
214         int ret;
215         int level;
216
217         b->count++;
218         while (b) {
219                 c = &b->node;
220                 level = node_level(c->header.flags);
221                 p->nodes[level] = b;
222                 ret = check_block(p, level);
223                 if (ret)
224                         return -1;
225                 ret = bin_search(c, key, &slot);
226                 if (!is_leaf(c->header.flags)) {
227                         if (ret && slot > 0)
228                                 slot -= 1;
229                         p->slots[level] = slot;
230                         if (ins_len > 0 &&
231                             c->header.nritems == NODEPTRS_PER_BLOCK) {
232                                 int sret = split_node(root, p, level);
233                                 BUG_ON(sret > 0);
234                                 if (sret)
235                                         return sret;
236                                 b = p->nodes[level];
237                                 c = &b->node;
238                                 slot = p->slots[level];
239                         }
240                         b = read_tree_block(root, c->blockptrs[slot]);
241                         continue;
242                 } else {
243                         struct leaf *l = (struct leaf *)c;
244                         p->slots[level] = slot;
245                         if (ins_len > 0 && leaf_free_space(l) <
246                             sizeof(struct item) + ins_len) {
247                                 int sret = split_leaf(root, p, ins_len);
248                                 BUG_ON(sret > 0);
249                                 if (sret)
250                                         return sret;
251                         }
252                         return ret;
253                 }
254         }
255         return 1;
256 }
257
258 /*
259  * adjust the pointers going up the tree, starting at level
260  * making sure the right key of each node is points to 'key'.
261  * This is used after shifting pointers to the left, so it stops
262  * fixing up pointers when a given leaf/node is not in slot 0 of the
263  * higher levels
264  *
265  * If this fails to write a tree block, it returns -1, but continues
266  * fixing up the blocks in ram so the tree is consistent.
267  */
268 static int fixup_low_keys(struct ctree_root *root,
269                            struct ctree_path *path, struct key *key,
270                            int level)
271 {
272         int i;
273         int ret = 0;
274         int wret;
275         for (i = level; i < MAX_LEVEL; i++) {
276                 struct node *t;
277                 int tslot = path->slots[i];
278                 if (!path->nodes[i])
279                         break;
280                 t = &path->nodes[i]->node;
281                 memcpy(t->keys + tslot, key, sizeof(*key));
282                 wret = write_tree_block(root, path->nodes[i]);
283                 if (wret)
284                         ret = wret;
285                 if (tslot != 0)
286                         break;
287         }
288         return ret;
289 }
290
291 /*
292  * try to push data from one node into the next node left in the
293  * tree.  The src node is found at specified level in the path.
294  * If some bytes were pushed, return 0, otherwise return 1.
295  *
296  * Lower nodes/leaves in the path are not touched, higher nodes may
297  * be modified to reflect the push.
298  *
299  * The path is altered to reflect the push.
300  *
301  * returns 0 if some ptrs were pushed left, < 0 if there was some horrible
302  * error, and > 0 if there was no room in the left hand block.
303  */
304 static int push_node_left(struct ctree_root *root, struct ctree_path *path,
305                           int level)
306 {
307         int slot;
308         struct node *left;
309         struct node *right;
310         int push_items = 0;
311         int left_nritems;
312         int right_nritems;
313         struct tree_buffer *t;
314         struct tree_buffer *right_buf;
315         int ret = 0;
316         int wret;
317
318         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
319                 return 1;
320         slot = path->slots[level + 1];
321         if (slot == 0)
322                 return 1;
323
324         t = read_tree_block(root,
325                             path->nodes[level + 1]->node.blockptrs[slot - 1]);
326         left = &t->node;
327         right_buf = path->nodes[level];
328         right = &right_buf->node;
329         left_nritems = left->header.nritems;
330         right_nritems = right->header.nritems;
331         push_items = NODEPTRS_PER_BLOCK - (left_nritems + 1);
332         if (push_items <= 0) {
333                 tree_block_release(root, t);
334                 return 1;
335         }
336
337         if (right_nritems < push_items)
338                 push_items = right_nritems;
339         memcpy(left->keys + left_nritems, right->keys,
340                 push_items * sizeof(struct key));
341         memcpy(left->blockptrs + left_nritems, right->blockptrs,
342                 push_items * sizeof(u64));
343         memmove(right->keys, right->keys + push_items,
344                 (right_nritems - push_items) * sizeof(struct key));
345         memmove(right->blockptrs, right->blockptrs + push_items,
346                 (right_nritems - push_items) * sizeof(u64));
347         right->header.nritems -= push_items;
348         left->header.nritems += push_items;
349
350         /* adjust the pointers going up the tree */
351         wret = fixup_low_keys(root, path, right->keys, level + 1);
352         if (wret < 0)
353                 ret = wret;
354
355         wret = write_tree_block(root, t);
356         if (wret < 0)
357                 ret = wret;
358
359         wret = write_tree_block(root, right_buf);
360         if (wret < 0)
361                 ret = wret;
362
363         /* then fixup the leaf pointer in the path */
364         if (path->slots[level] < push_items) {
365                 path->slots[level] += left_nritems;
366                 tree_block_release(root, path->nodes[level]);
367                 path->nodes[level] = t;
368                 path->slots[level + 1] -= 1;
369         } else {
370                 path->slots[level] -= push_items;
371                 tree_block_release(root, t);
372         }
373         return ret;
374 }
375
376 /*
377  * try to push data from one node into the next node right in the
378  * tree.  The src node is found at specified level in the path.
379  * If some bytes were pushed, return 0, otherwise return 1.
380  *
381  * Lower nodes/leaves in the path are not touched, higher nodes may
382  * be modified to reflect the push.
383  *
384  * The path is altered to reflect the push.
385  *
386  * returns 0 if some ptrs were pushed, < 0 if there was some horrible
387  * error, and > 0 if there was no room in the right hand block.
388  */
389 static int push_node_right(struct ctree_root *root, struct ctree_path *path,
390                            int level)
391 {
392         int slot;
393         struct tree_buffer *t;
394         struct tree_buffer *src_buffer;
395         struct node *dst;
396         struct node *src;
397         int push_items = 0;
398         int dst_nritems;
399         int src_nritems;
400
401         /* can't push from the root */
402         if (level == MAX_LEVEL - 1 || path->nodes[level + 1] == 0)
403                 return 1;
404
405         /* only try to push inside the node higher up */
406         slot = path->slots[level + 1];
407         if (slot == NODEPTRS_PER_BLOCK - 1)
408                 return 1;
409
410         if (slot >= path->nodes[level + 1]->node.header.nritems -1)
411                 return 1;
412
413         t = read_tree_block(root,
414                             path->nodes[level + 1]->node.blockptrs[slot + 1]);
415         dst = &t->node;
416         src_buffer = path->nodes[level];
417         src = &src_buffer->node;
418         dst_nritems = dst->header.nritems;
419         src_nritems = src->header.nritems;
420         push_items = NODEPTRS_PER_BLOCK - (dst_nritems + 1);
421         if (push_items <= 0) {
422                 tree_block_release(root, t);
423                 return 1;
424         }
425
426         if (src_nritems < push_items)
427                 push_items = src_nritems;
428         memmove(dst->keys + push_items, dst->keys,
429                 dst_nritems * sizeof(struct key));
430         memcpy(dst->keys, src->keys + src_nritems - push_items,
431                 push_items * sizeof(struct key));
432
433         memmove(dst->blockptrs + push_items, dst->blockptrs,
434                 dst_nritems * sizeof(u64));
435         memcpy(dst->blockptrs, src->blockptrs + src_nritems - push_items,
436                 push_items * sizeof(u64));
437
438         src->header.nritems -= push_items;
439         dst->header.nritems += push_items;
440
441         /* adjust the pointers going up the tree */
442         memcpy(path->nodes[level + 1]->node.keys + path->slots[level + 1] + 1,
443                 dst->keys, sizeof(struct key));
444
445         write_tree_block(root, path->nodes[level + 1]);
446         write_tree_block(root, t);
447         write_tree_block(root, src_buffer);
448
449         /* then fixup the pointers in the path */
450         if (path->slots[level] >= src->header.nritems) {
451                 path->slots[level] -= src->header.nritems;
452                 tree_block_release(root, path->nodes[level]);
453                 path->nodes[level] = t;
454                 path->slots[level + 1] += 1;
455         } else {
456                 tree_block_release(root, t);
457         }
458         return 0;
459 }
460
461 /*
462  * helper function to insert a new root level in the tree.
463  * A new node is allocated, and a single item is inserted to
464  * point to the existing root
465  *
466  * returns zero on success or < 0 on failure.
467  */
468 static int insert_new_root(struct ctree_root *root,
469                            struct ctree_path *path, int level)
470 {
471         struct tree_buffer *t;
472         struct node *lower;
473         struct node *c;
474         struct key *lower_key;
475
476         BUG_ON(path->nodes[level]);
477         BUG_ON(path->nodes[level-1] != root->node);
478
479         t = alloc_free_block(root);
480         c = &t->node;
481         memset(c, 0, sizeof(c));
482         c->header.nritems = 1;
483         c->header.flags = node_level(level);
484         c->header.blocknr = t->blocknr;
485         c->header.parentid = root->node->node.header.parentid;
486         lower = &path->nodes[level-1]->node;
487         if (is_leaf(lower->header.flags))
488                 lower_key = &((struct leaf *)lower)->items[0].key;
489         else
490                 lower_key = lower->keys;
491         memcpy(c->keys, lower_key, sizeof(struct key));
492         c->blockptrs[0] = path->nodes[level-1]->blocknr;
493         /* the super has an extra ref to root->node */
494         tree_block_release(root, root->node);
495         root->node = t;
496         t->count++;
497         write_tree_block(root, t);
498         path->nodes[level] = t;
499         path->slots[level] = 0;
500         return 0;
501 }
502
503 /*
504  * worker function to insert a single pointer in a node.
505  * the node should have enough room for the pointer already
506  *
507  * slot and level indicate where you want the key to go, and
508  * blocknr is the block the key points to.
509  *
510  * returns zero on success and < 0 on any error
511  */
512 static int insert_ptr(struct ctree_root *root,
513                 struct ctree_path *path, struct key *key,
514                 u64 blocknr, int slot, int level)
515 {
516         struct node *lower;
517         int nritems;
518
519         BUG_ON(!path->nodes[level]);
520         lower = &path->nodes[level]->node;
521         nritems = lower->header.nritems;
522         if (slot > nritems)
523                 BUG();
524         if (nritems == NODEPTRS_PER_BLOCK)
525                 BUG();
526         if (slot != nritems) {
527                 memmove(lower->keys + slot + 1, lower->keys + slot,
528                         (nritems - slot) * sizeof(struct key));
529                 memmove(lower->blockptrs + slot + 1, lower->blockptrs + slot,
530                         (nritems - slot) * sizeof(u64));
531         }
532         memcpy(lower->keys + slot, key, sizeof(struct key));
533         lower->blockptrs[slot] = blocknr;
534         lower->header.nritems++;
535         if (lower->keys[1].objectid == 0)
536                         BUG();
537         write_tree_block(root, path->nodes[level]);
538         return 0;
539 }
540
541 /*
542  * split the node at the specified level in path in two.
543  * The path is corrected to point to the appropriate node after the split
544  *
545  * Before splitting this tries to make some room in the node by pushing
546  * left and right, if either one works, it returns right away.
547  *
548  * returns 0 on success and < 0 on failure
549  */
550 static int split_node(struct ctree_root *root, struct ctree_path *path,
551                       int level)
552 {
553         struct tree_buffer *t;
554         struct node *c;
555         struct tree_buffer *split_buffer;
556         struct node *split;
557         int mid;
558         int ret;
559         int wret;
560
561         ret = push_node_left(root, path, level);
562         if (!ret)
563                 return 0;
564         if (ret < 0)
565                 return ret;
566         ret = push_node_right(root, path, level);
567         if (!ret)
568                 return 0;
569         if (ret < 0)
570                 return ret;
571         t = path->nodes[level];
572         c = &t->node;
573         if (t == root->node) {
574                 /* trying to split the root, lets make a new one */
575                 ret = insert_new_root(root, path, level + 1);
576                 if (ret)
577                         return ret;
578         }
579         split_buffer = alloc_free_block(root);
580         split = &split_buffer->node;
581         split->header.flags = c->header.flags;
582         split->header.blocknr = split_buffer->blocknr;
583         split->header.parentid = root->node->node.header.parentid;
584         mid = (c->header.nritems + 1) / 2;
585         memcpy(split->keys, c->keys + mid,
586                 (c->header.nritems - mid) * sizeof(struct key));
587         memcpy(split->blockptrs, c->blockptrs + mid,
588                 (c->header.nritems - mid) * sizeof(u64));
589         split->header.nritems = c->header.nritems - mid;
590         c->header.nritems = mid;
591         ret = 0;
592
593         wret = write_tree_block(root, t);
594         if (wret)
595                 ret = wret;
596         wret = write_tree_block(root, split_buffer);
597         if (wret)
598                 ret = wret;
599         wret = insert_ptr(root, path, split->keys, split_buffer->blocknr,
600                           path->slots[level + 1] + 1, level + 1);
601         if (wret)
602                 ret = wret;
603
604         if (path->slots[level] >= mid) {
605                 path->slots[level] -= mid;
606                 tree_block_release(root, t);
607                 path->nodes[level] = split_buffer;
608                 path->slots[level + 1] += 1;
609         } else {
610                 tree_block_release(root, split_buffer);
611         }
612         return ret;
613 }
614
615 /*
616  * how many bytes are required to store the items in a leaf.  start
617  * and nr indicate which items in the leaf to check.  This totals up the
618  * space used both by the item structs and the item data
619  */
620 static int leaf_space_used(struct leaf *l, int start, int nr)
621 {
622         int data_len;
623         int end = start + nr - 1;
624
625         if (!nr)
626                 return 0;
627         data_len = l->items[start].offset + l->items[start].size;
628         data_len = data_len - l->items[end].offset;
629         data_len += sizeof(struct item) * nr;
630         return data_len;
631 }
632
633 /*
634  * push some data in the path leaf to the right, trying to free up at
635  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
636  *
637  * returns 1 if the push failed because the other node didn't have enough
638  * room, 0 if everything worked out and < 0 if there were major errors.
639  */
640 static int push_leaf_right(struct ctree_root *root, struct ctree_path *path,
641                            int data_size)
642 {
643         struct tree_buffer *left_buf = path->nodes[0];
644         struct leaf *left = &left_buf->leaf;
645         struct leaf *right;
646         struct tree_buffer *right_buf;
647         struct tree_buffer *upper;
648         int slot;
649         int i;
650         int free_space;
651         int push_space = 0;
652         int push_items = 0;
653         struct item *item;
654
655         slot = path->slots[1];
656         if (!path->nodes[1]) {
657                 return 1;
658         }
659         upper = path->nodes[1];
660         if (slot >= upper->node.header.nritems - 1) {
661                 return 1;
662         }
663         right_buf = read_tree_block(root, upper->node.blockptrs[slot + 1]);
664         right = &right_buf->leaf;
665         free_space = leaf_free_space(right);
666         if (free_space < data_size + sizeof(struct item)) {
667                 tree_block_release(root, right_buf);
668                 return 1;
669         }
670         for (i = left->header.nritems - 1; i >= 0; i--) {
671                 item = left->items + i;
672                 if (path->slots[0] == i)
673                         push_space += data_size + sizeof(*item);
674                 if (item->size + sizeof(*item) + push_space > free_space)
675                         break;
676                 push_items++;
677                 push_space += item->size + sizeof(*item);
678         }
679         if (push_items == 0) {
680                 tree_block_release(root, right_buf);
681                 return 1;
682         }
683         /* push left to right */
684         push_space = left->items[left->header.nritems - push_items].offset +
685                      left->items[left->header.nritems - push_items].size;
686         push_space -= leaf_data_end(left);
687         /* make room in the right data area */
688         memmove(right->data + leaf_data_end(right) - push_space,
689                 right->data + leaf_data_end(right),
690                 LEAF_DATA_SIZE - leaf_data_end(right));
691         /* copy from the left data area */
692         memcpy(right->data + LEAF_DATA_SIZE - push_space,
693                 left->data + leaf_data_end(left),
694                 push_space);
695         memmove(right->items + push_items, right->items,
696                 right->header.nritems * sizeof(struct item));
697         /* copy the items from left to right */
698         memcpy(right->items, left->items + left->header.nritems - push_items,
699                 push_items * sizeof(struct item));
700
701         /* update the item pointers */
702         right->header.nritems += push_items;
703         push_space = LEAF_DATA_SIZE;
704         for (i = 0; i < right->header.nritems; i++) {
705                 right->items[i].offset = push_space - right->items[i].size;
706                 push_space = right->items[i].offset;
707         }
708         left->header.nritems -= push_items;
709
710         write_tree_block(root, left_buf);
711         write_tree_block(root, right_buf);
712         memcpy(upper->node.keys + slot + 1,
713                 &right->items[0].key, sizeof(struct key));
714         write_tree_block(root, upper);
715         /* then fixup the leaf pointer in the path */
716         if (path->slots[0] >= left->header.nritems) {
717                 path->slots[0] -= left->header.nritems;
718                 tree_block_release(root, path->nodes[0]);
719                 path->nodes[0] = right_buf;
720                 path->slots[1] += 1;
721         } else {
722                 tree_block_release(root, right_buf);
723         }
724         return 0;
725 }
726 /*
727  * push some data in the path leaf to the left, trying to free up at
728  * least data_size bytes.  returns zero if the push worked, nonzero otherwise
729  */
730 static int push_leaf_left(struct ctree_root *root, struct ctree_path *path,
731                           int data_size)
732 {
733         struct tree_buffer *right_buf = path->nodes[0];
734         struct leaf *right = &right_buf->leaf;
735         struct tree_buffer *t;
736         struct leaf *left;
737         int slot;
738         int i;
739         int free_space;
740         int push_space = 0;
741         int push_items = 0;
742         struct item *item;
743         int old_left_nritems;
744         int ret = 0;
745         int wret;
746
747         slot = path->slots[1];
748         if (slot == 0) {
749                 return 1;
750         }
751         if (!path->nodes[1]) {
752                 return 1;
753         }
754         t = read_tree_block(root, path->nodes[1]->node.blockptrs[slot - 1]);
755         left = &t->leaf;
756         free_space = leaf_free_space(left);
757         if (free_space < data_size + sizeof(struct item)) {
758                 tree_block_release(root, t);
759                 return 1;
760         }
761         for (i = 0; i < right->header.nritems; i++) {
762                 item = right->items + i;
763                 if (path->slots[0] == i)
764                         push_space += data_size + sizeof(*item);
765                 if (item->size + sizeof(*item) + push_space > free_space)
766                         break;
767                 push_items++;
768                 push_space += item->size + sizeof(*item);
769         }
770         if (push_items == 0) {
771                 tree_block_release(root, t);
772                 return 1;
773         }
774         /* push data from right to left */
775         memcpy(left->items + left->header.nritems,
776                 right->items, push_items * sizeof(struct item));
777         push_space = LEAF_DATA_SIZE - right->items[push_items -1].offset;
778         memcpy(left->data + leaf_data_end(left) - push_space,
779                 right->data + right->items[push_items - 1].offset,
780                 push_space);
781         old_left_nritems = left->header.nritems;
782         BUG_ON(old_left_nritems < 0);
783
784         for(i = old_left_nritems; i < old_left_nritems + push_items; i++) {
785                 left->items[i].offset -= LEAF_DATA_SIZE -
786                         left->items[old_left_nritems -1].offset;
787         }
788         left->header.nritems += push_items;
789
790         /* fixup right node */
791         push_space = right->items[push_items-1].offset - leaf_data_end(right);
792         memmove(right->data + LEAF_DATA_SIZE - push_space, right->data +
793                 leaf_data_end(right), push_space);
794         memmove(right->items, right->items + push_items,
795                 (right->header.nritems - push_items) * sizeof(struct item));
796         right->header.nritems -= push_items;
797         push_space = LEAF_DATA_SIZE;
798
799         for (i = 0; i < right->header.nritems; i++) {
800                 right->items[i].offset = push_space - right->items[i].size;
801                 push_space = right->items[i].offset;
802         }
803
804         wret = write_tree_block(root, t);
805         if (wret)
806                 ret = wret;
807         wret = write_tree_block(root, right_buf);
808         if (wret)
809                 ret = wret;
810
811         wret = fixup_low_keys(root, path, &right->items[0].key, 1);
812         if (wret)
813                 ret = wret;
814
815         /* then fixup the leaf pointer in the path */
816         if (path->slots[0] < push_items) {
817                 path->slots[0] += old_left_nritems;
818                 tree_block_release(root, path->nodes[0]);
819                 path->nodes[0] = t;
820                 path->slots[1] -= 1;
821         } else {
822                 tree_block_release(root, t);
823                 path->slots[0] -= push_items;
824         }
825         BUG_ON(path->slots[0] < 0);
826         return ret;
827 }
828
829 /*
830  * split the path's leaf in two, making sure there is at least data_size
831  * available for the resulting leaf level of the path.
832  *
833  * returns 0 if all went well and < 0 on failure.
834  */
835 static int split_leaf(struct ctree_root *root, struct ctree_path *path,
836                       int data_size)
837 {
838         struct tree_buffer *l_buf;
839         struct leaf *l;
840         int nritems;
841         int mid;
842         int slot;
843         struct leaf *right;
844         struct tree_buffer *right_buffer;
845         int space_needed = data_size + sizeof(struct item);
846         int data_copy_size;
847         int rt_data_off;
848         int i;
849         int ret;
850         int wret;
851
852         wret = push_leaf_left(root, path, data_size);
853         if (wret < 0)
854                 return wret;
855         if (wret) {
856                 wret = push_leaf_right(root, path, data_size);
857                 if (wret < 0)
858                         return wret;
859         }
860         l_buf = path->nodes[0];
861         l = &l_buf->leaf;
862
863         /* did the pushes work? */
864         if (leaf_free_space(l) >= sizeof(struct item) + data_size)
865                 return 0;
866
867         if (!path->nodes[1]) {
868                 ret = insert_new_root(root, path, 1);
869                 if (ret)
870                         return ret;
871         }
872         slot = path->slots[0];
873         nritems = l->header.nritems;
874         mid = (nritems + 1)/ 2;
875
876         right_buffer = alloc_free_block(root);
877         BUG_ON(!right_buffer);
878         BUG_ON(mid == nritems);
879         right = &right_buffer->leaf;
880         memset(right, 0, sizeof(*right));
881         if (mid <= slot) {
882                 /* FIXME, just alloc a new leaf here */
883                 if (leaf_space_used(l, mid, nritems - mid) + space_needed >
884                         LEAF_DATA_SIZE)
885                         BUG();
886         } else {
887                 /* FIXME, just alloc a new leaf here */
888                 if (leaf_space_used(l, 0, mid + 1) + space_needed >
889                         LEAF_DATA_SIZE)
890                         BUG();
891         }
892         right->header.nritems = nritems - mid;
893         right->header.blocknr = right_buffer->blocknr;
894         right->header.flags = node_level(0);
895         right->header.parentid = root->node->node.header.parentid;
896         data_copy_size = l->items[mid].offset + l->items[mid].size -
897                          leaf_data_end(l);
898         memcpy(right->items, l->items + mid,
899                (nritems - mid) * sizeof(struct item));
900         memcpy(right->data + LEAF_DATA_SIZE - data_copy_size,
901                l->data + leaf_data_end(l), data_copy_size);
902         rt_data_off = LEAF_DATA_SIZE -
903                      (l->items[mid].offset + l->items[mid].size);
904
905         for (i = 0; i < right->header.nritems; i++)
906                 right->items[i].offset += rt_data_off;
907
908         l->header.nritems = mid;
909         ret = 0;
910         wret = insert_ptr(root, path, &right->items[0].key,
911                           right_buffer->blocknr, path->slots[1] + 1, 1);
912         if (wret)
913                 ret = wret;
914         wret = write_tree_block(root, right_buffer);
915         if (wret)
916                 ret = wret;
917         wret = write_tree_block(root, l_buf);
918         if (wret)
919                 ret = wret;
920
921         BUG_ON(path->slots[0] != slot);
922         if (mid <= slot) {
923                 tree_block_release(root, path->nodes[0]);
924                 path->nodes[0] = right_buffer;
925                 path->slots[0] -= mid;
926                 path->slots[1] += 1;
927         } else
928                 tree_block_release(root, right_buffer);
929         BUG_ON(path->slots[0] < 0);
930         return ret;
931 }
932
933 /*
934  * Given a key and some data, insert an item into the tree.
935  * This does all the path init required, making room in the tree if needed.
936  */
937 int insert_item(struct ctree_root *root, struct key *key,
938                           void *data, int data_size)
939 {
940         int ret = 0;
941         int wret;
942         int slot;
943         int slot_orig;
944         struct leaf *leaf;
945         struct tree_buffer *leaf_buf;
946         unsigned int nritems;
947         unsigned int data_end;
948         struct ctree_path path;
949
950         /* create a root if there isn't one */
951         if (!root->node)
952                 BUG();
953         init_path(&path);
954         ret = search_slot(root, key, &path, data_size);
955         if (ret == 0) {
956                 release_path(root, &path);
957                 return -EEXIST;
958         }
959         if (ret < 0) {
960                 release_path(root, &path);
961                 return ret;
962         }
963
964         slot_orig = path.slots[0];
965         leaf_buf = path.nodes[0];
966         leaf = &leaf_buf->leaf;
967
968         nritems = leaf->header.nritems;
969         data_end = leaf_data_end(leaf);
970
971         if (leaf_free_space(leaf) <  sizeof(struct item) + data_size)
972                 BUG();
973
974         slot = path.slots[0];
975         BUG_ON(slot < 0);
976         if (slot != nritems) {
977                 int i;
978                 unsigned int old_data = leaf->items[slot].offset +
979                                         leaf->items[slot].size;
980
981                 /*
982                  * item0..itemN ... dataN.offset..dataN.size .. data0.size
983                  */
984                 /* first correct the data pointers */
985                 for (i = slot; i < nritems; i++)
986                         leaf->items[i].offset -= data_size;
987
988                 /* shift the items */
989                 memmove(leaf->items + slot + 1, leaf->items + slot,
990                         (nritems - slot) * sizeof(struct item));
991
992                 /* shift the data */
993                 memmove(leaf->data + data_end - data_size, leaf->data +
994                         data_end, old_data - data_end);
995                 data_end = old_data;
996         }
997         /* copy the new data in */
998         memcpy(&leaf->items[slot].key, key, sizeof(struct key));
999         leaf->items[slot].offset = data_end - data_size;
1000         leaf->items[slot].size = data_size;
1001         memcpy(leaf->data + data_end - data_size, data, data_size);
1002         leaf->header.nritems += 1;
1003
1004         ret = 0;
1005         if (slot == 0)
1006                 ret = fixup_low_keys(root, &path, key, 1);
1007
1008         wret = write_tree_block(root, leaf_buf);
1009         if (wret)
1010                 ret = wret;
1011
1012         if (leaf_free_space(leaf) < 0)
1013                 BUG();
1014         release_path(root, &path);
1015         return ret;
1016 }
1017
1018 /*
1019  * delete the pointer from a given node.
1020  *
1021  * If the delete empties a node, the node is removed from the tree,
1022  * continuing all the way the root if required.  The root is converted into
1023  * a leaf if all the nodes are emptied.
1024  */
1025 static int del_ptr(struct ctree_root *root, struct ctree_path *path, int level)
1026 {
1027         int slot;
1028         struct tree_buffer *t;
1029         struct node *node;
1030         int nritems;
1031         u64 blocknr;
1032         int wret;
1033         int ret = 0;
1034
1035         while(1) {
1036                 t = path->nodes[level];
1037                 if (!t)
1038                         break;
1039                 node = &t->node;
1040                 slot = path->slots[level];
1041                 nritems = node->header.nritems;
1042
1043                 if (slot != nritems -1) {
1044                         memmove(node->keys + slot, node->keys + slot + 1,
1045                                 sizeof(struct key) * (nritems - slot - 1));
1046                         memmove(node->blockptrs + slot,
1047                                 node->blockptrs + slot + 1,
1048                                 sizeof(u64) * (nritems - slot - 1));
1049                 }
1050                 node->header.nritems--;
1051                 blocknr = t->blocknr;
1052                 write_tree_block(root, t);
1053                 if (node->header.nritems != 0) {
1054                         int tslot;
1055                         if (slot == 0) {
1056                                 wret = fixup_low_keys(root, path,
1057                                                            node->keys,
1058                                                            level + 1);
1059                                 if (wret)
1060                                         ret = wret;
1061                         }
1062                         tslot = path->slots[level + 1];
1063                         t->count++;
1064                         wret = push_node_left(root, path, level);
1065                         if (wret < 0) {
1066                                 ret = wret;
1067                                 break;
1068                         }
1069                         if (node->header.nritems != 0) {
1070                                 wret = push_node_right(root, path, level);
1071                                 if (wret < 0) {
1072                                         ret = wret;
1073                                         break;
1074                                 }
1075                         }
1076                         path->slots[level + 1] = tslot;
1077                         if (node->header.nritems != 0) {
1078                                 tree_block_release(root, t);
1079                                 break;
1080                         }
1081                         tree_block_release(root, t);
1082                 }
1083                 if (t == root->node) {
1084                         /* just turn the root into a leaf and break */
1085                         root->node->node.header.flags = node_level(0);
1086                         write_tree_block(root, t);
1087                         break;
1088                 }
1089                 level++;
1090                 wret = free_extent(root, blocknr, 1);
1091                 if (wret)
1092                         ret = wret;
1093                 if (!path->nodes[level])
1094                         BUG();
1095         }
1096         return ret;
1097 }
1098
1099 /*
1100  * delete the item at the leaf level in path.  If that empties
1101  * the leaf, remove it from the tree
1102  */
1103 int del_item(struct ctree_root *root, struct ctree_path *path)
1104 {
1105         int slot;
1106         struct leaf *leaf;
1107         struct tree_buffer *leaf_buf;
1108         int doff;
1109         int dsize;
1110         int ret = 0;
1111         int wret;
1112
1113         leaf_buf = path->nodes[0];
1114         leaf = &leaf_buf->leaf;
1115         slot = path->slots[0];
1116         doff = leaf->items[slot].offset;
1117         dsize = leaf->items[slot].size;
1118
1119         if (slot != leaf->header.nritems - 1) {
1120                 int i;
1121                 int data_end = leaf_data_end(leaf);
1122                 memmove(leaf->data + data_end + dsize,
1123                         leaf->data + data_end,
1124                         doff - data_end);
1125                 for (i = slot + 1; i < leaf->header.nritems; i++)
1126                         leaf->items[i].offset += dsize;
1127                 memmove(leaf->items + slot, leaf->items + slot + 1,
1128                         sizeof(struct item) *
1129                         (leaf->header.nritems - slot - 1));
1130         }
1131         leaf->header.nritems -= 1;
1132         /* delete the leaf if we've emptied it */
1133         if (leaf->header.nritems == 0) {
1134                 if (leaf_buf == root->node) {
1135                         leaf->header.flags = node_level(0);
1136                         write_tree_block(root, leaf_buf);
1137                 } else {
1138                         wret = del_ptr(root, path, 1);
1139                         if (wret)
1140                                 ret = wret;
1141                         wret = free_extent(root, leaf_buf->blocknr, 1);
1142                         if (wret)
1143                                 ret = wret;
1144                 }
1145         } else {
1146                 int used = leaf_space_used(leaf, 0, leaf->header.nritems);
1147                 if (slot == 0) {
1148                         wret = fixup_low_keys(root, path,
1149                                                    &leaf->items[0].key, 1);
1150                         if (wret)
1151                                 ret = wret;
1152                 }
1153                 wret = write_tree_block(root, leaf_buf);
1154                 if (wret)
1155                         ret = wret;
1156
1157                 /* delete the leaf if it is mostly empty */
1158                 if (used < LEAF_DATA_SIZE / 3) {
1159                         /* push_leaf_left fixes the path.
1160                          * make sure the path still points to our leaf
1161                          * for possible call to del_ptr below
1162                          */
1163                         slot = path->slots[1];
1164                         leaf_buf->count++;
1165                         wret = push_leaf_left(root, path, 1);
1166                         if (wret < 0)
1167                                 ret = wret;
1168                         if (leaf->header.nritems) {
1169                                 wret = push_leaf_right(root, path, 1);
1170                                 if (wret < 0)
1171                                         ret = wret;
1172                         }
1173                         if (leaf->header.nritems == 0) {
1174                                 u64 blocknr = leaf_buf->blocknr;
1175                                 path->slots[1] = slot;
1176                                 wret = del_ptr(root, path, 1);
1177                                 if (wret)
1178                                         ret = wret;
1179                                 tree_block_release(root, leaf_buf);
1180                                 wret = free_extent(root, blocknr, 1);
1181                                 if (wret)
1182                                         ret = wret;
1183                         } else {
1184                                 tree_block_release(root, leaf_buf);
1185                         }
1186                 }
1187         }
1188         return ret;
1189 }
1190
1191 /*
1192  * walk up the tree as far as required to find the next leaf.
1193  * returns 0 if it found something or 1 if there are no greater leaves.
1194  * returns < 0 on io errors.
1195  */
1196 int next_leaf(struct ctree_root *root, struct ctree_path *path)
1197 {
1198         int slot;
1199         int level = 1;
1200         u64 blocknr;
1201         struct tree_buffer *c;
1202         struct tree_buffer *next = NULL;
1203
1204         while(level < MAX_LEVEL) {
1205                 if (!path->nodes[level])
1206                         return 1;
1207                 slot = path->slots[level] + 1;
1208                 c = path->nodes[level];
1209                 if (slot >= c->node.header.nritems) {
1210                         level++;
1211                         continue;
1212                 }
1213                 blocknr = c->node.blockptrs[slot];
1214                 if (next)
1215                         tree_block_release(root, next);
1216                 next = read_tree_block(root, blocknr);
1217                 break;
1218         }
1219         path->slots[level] = slot;
1220         while(1) {
1221                 level--;
1222                 c = path->nodes[level];
1223                 tree_block_release(root, c);
1224                 path->nodes[level] = next;
1225                 path->slots[level] = 0;
1226                 if (!level)
1227                         break;
1228                 next = read_tree_block(root, next->node.blockptrs[0]);
1229         }
1230         return 0;
1231 }
1232