Merge branch 'for-linus' of git://neil.brown.name/md
[linux-2.6] / fs / nilfs2 / btree.c
1 /*
2  * btree.c - NILFS B-tree.
3  *
4  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  * Written by Koji Sato <koji@osrg.net>.
21  */
22
23 #include <linux/slab.h>
24 #include <linux/string.h>
25 #include <linux/errno.h>
26 #include <linux/pagevec.h>
27 #include "nilfs.h"
28 #include "page.h"
29 #include "btnode.h"
30 #include "btree.h"
31 #include "alloc.h"
32
33 /**
34  * struct nilfs_btree_path - A path on which B-tree operations are executed
35  * @bp_bh: buffer head of node block
36  * @bp_sib_bh: buffer head of sibling node block
37  * @bp_index: index of child node
38  * @bp_oldreq: ptr end request for old ptr
39  * @bp_newreq: ptr alloc request for new ptr
40  * @bp_op: rebalance operation
41  */
42 struct nilfs_btree_path {
43         struct buffer_head *bp_bh;
44         struct buffer_head *bp_sib_bh;
45         int bp_index;
46         union nilfs_bmap_ptr_req bp_oldreq;
47         union nilfs_bmap_ptr_req bp_newreq;
48         struct nilfs_btnode_chkey_ctxt bp_ctxt;
49         void (*bp_op)(struct nilfs_btree *, struct nilfs_btree_path *,
50                       int, __u64 *, __u64 *);
51 };
52
53 /*
54  * B-tree path operations
55  */
56
57 static struct kmem_cache *nilfs_btree_path_cache;
58
59 int __init nilfs_btree_path_cache_init(void)
60 {
61         nilfs_btree_path_cache =
62                 kmem_cache_create("nilfs2_btree_path_cache",
63                                   sizeof(struct nilfs_btree_path) *
64                                   NILFS_BTREE_LEVEL_MAX, 0, 0, NULL);
65         return (nilfs_btree_path_cache != NULL) ? 0 : -ENOMEM;
66 }
67
68 void nilfs_btree_path_cache_destroy(void)
69 {
70         kmem_cache_destroy(nilfs_btree_path_cache);
71 }
72
73 static inline struct nilfs_btree_path *
74 nilfs_btree_alloc_path(const struct nilfs_btree *btree)
75 {
76         return (struct nilfs_btree_path *)
77                 kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS);
78 }
79
80 static inline void nilfs_btree_free_path(const struct nilfs_btree *btree,
81                                          struct nilfs_btree_path *path)
82 {
83         kmem_cache_free(nilfs_btree_path_cache, path);
84 }
85
86 static void nilfs_btree_init_path(const struct nilfs_btree *btree,
87                                   struct nilfs_btree_path *path)
88 {
89         int level;
90
91         for (level = NILFS_BTREE_LEVEL_DATA;
92              level < NILFS_BTREE_LEVEL_MAX;
93              level++) {
94                 path[level].bp_bh = NULL;
95                 path[level].bp_sib_bh = NULL;
96                 path[level].bp_index = 0;
97                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
98                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
99                 path[level].bp_op = NULL;
100         }
101 }
102
103 static void nilfs_btree_clear_path(const struct nilfs_btree *btree,
104                                    struct nilfs_btree_path *path)
105 {
106         int level;
107
108         for (level = NILFS_BTREE_LEVEL_DATA;
109              level < NILFS_BTREE_LEVEL_MAX;
110              level++) {
111                 if (path[level].bp_bh != NULL) {
112                         nilfs_bmap_put_block(&btree->bt_bmap,
113                                              path[level].bp_bh);
114                         path[level].bp_bh = NULL;
115                 }
116                 /* sib_bh is released or deleted by prepare or commit
117                  * operations. */
118                 path[level].bp_sib_bh = NULL;
119                 path[level].bp_index = 0;
120                 path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
121                 path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR;
122                 path[level].bp_op = NULL;
123         }
124 }
125
126
127 /*
128  * B-tree node operations
129  */
130
131 static inline int
132 nilfs_btree_node_get_flags(const struct nilfs_btree *btree,
133                            const struct nilfs_btree_node *node)
134 {
135         return node->bn_flags;
136 }
137
138 static inline void
139 nilfs_btree_node_set_flags(struct nilfs_btree *btree,
140                            struct nilfs_btree_node *node,
141                            int flags)
142 {
143         node->bn_flags = flags;
144 }
145
146 static inline int nilfs_btree_node_root(const struct nilfs_btree *btree,
147                                         const struct nilfs_btree_node *node)
148 {
149         return nilfs_btree_node_get_flags(btree, node) & NILFS_BTREE_NODE_ROOT;
150 }
151
152 static inline int
153 nilfs_btree_node_get_level(const struct nilfs_btree *btree,
154                            const struct nilfs_btree_node *node)
155 {
156         return node->bn_level;
157 }
158
159 static inline void
160 nilfs_btree_node_set_level(struct nilfs_btree *btree,
161                            struct nilfs_btree_node *node,
162                            int level)
163 {
164         node->bn_level = level;
165 }
166
167 static inline int
168 nilfs_btree_node_get_nchildren(const struct nilfs_btree *btree,
169                                const struct nilfs_btree_node *node)
170 {
171         return le16_to_cpu(node->bn_nchildren);
172 }
173
174 static inline void
175 nilfs_btree_node_set_nchildren(struct nilfs_btree *btree,
176                                struct nilfs_btree_node *node,
177                                int nchildren)
178 {
179         node->bn_nchildren = cpu_to_le16(nchildren);
180 }
181
182 static inline int
183 nilfs_btree_node_size(const struct nilfs_btree *btree)
184 {
185         return 1 << btree->bt_bmap.b_inode->i_blkbits;
186 }
187
188 static inline int
189 nilfs_btree_node_nchildren_min(const struct nilfs_btree *btree,
190                                const struct nilfs_btree_node *node)
191 {
192         return nilfs_btree_node_root(btree, node) ?
193                 NILFS_BTREE_ROOT_NCHILDREN_MIN :
194                 NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree));
195 }
196
197 static inline int
198 nilfs_btree_node_nchildren_max(const struct nilfs_btree *btree,
199                                const struct nilfs_btree_node *node)
200 {
201         return nilfs_btree_node_root(btree, node) ?
202                 NILFS_BTREE_ROOT_NCHILDREN_MAX :
203                 NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(btree));
204 }
205
206 static inline __le64 *
207 nilfs_btree_node_dkeys(const struct nilfs_btree *btree,
208                        const struct nilfs_btree_node *node)
209 {
210         return (__le64 *)((char *)(node + 1) +
211                           (nilfs_btree_node_root(btree, node) ?
212                            0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE));
213 }
214
215 static inline __le64 *
216 nilfs_btree_node_dptrs(const struct nilfs_btree *btree,
217                        const struct nilfs_btree_node *node)
218 {
219         return (__le64 *)(nilfs_btree_node_dkeys(btree, node) +
220                           nilfs_btree_node_nchildren_max(btree, node));
221 }
222
223 static inline __u64
224 nilfs_btree_node_get_key(const struct nilfs_btree *btree,
225                          const struct nilfs_btree_node *node, int index)
226 {
227         return nilfs_bmap_dkey_to_key(*(nilfs_btree_node_dkeys(btree, node) +
228                                         index));
229 }
230
231 static inline void
232 nilfs_btree_node_set_key(struct nilfs_btree *btree,
233                          struct nilfs_btree_node *node, int index, __u64 key)
234 {
235         *(nilfs_btree_node_dkeys(btree, node) + index) =
236                 nilfs_bmap_key_to_dkey(key);
237 }
238
239 static inline __u64
240 nilfs_btree_node_get_ptr(const struct nilfs_btree *btree,
241                          const struct nilfs_btree_node *node,
242                          int index)
243 {
244         return nilfs_bmap_dptr_to_ptr(*(nilfs_btree_node_dptrs(btree, node) +
245                                         index));
246 }
247
248 static inline void
249 nilfs_btree_node_set_ptr(struct nilfs_btree *btree,
250                          struct nilfs_btree_node *node,
251                          int index,
252                          __u64 ptr)
253 {
254         *(nilfs_btree_node_dptrs(btree, node) + index) =
255                 nilfs_bmap_ptr_to_dptr(ptr);
256 }
257
258 static void nilfs_btree_node_init(struct nilfs_btree *btree,
259                                   struct nilfs_btree_node *node,
260                                   int flags, int level, int nchildren,
261                                   const __u64 *keys, const __u64 *ptrs)
262 {
263         __le64 *dkeys;
264         __le64 *dptrs;
265         int i;
266
267         nilfs_btree_node_set_flags(btree, node, flags);
268         nilfs_btree_node_set_level(btree, node, level);
269         nilfs_btree_node_set_nchildren(btree, node, nchildren);
270
271         dkeys = nilfs_btree_node_dkeys(btree, node);
272         dptrs = nilfs_btree_node_dptrs(btree, node);
273         for (i = 0; i < nchildren; i++) {
274                 dkeys[i] = nilfs_bmap_key_to_dkey(keys[i]);
275                 dptrs[i] = nilfs_bmap_ptr_to_dptr(ptrs[i]);
276         }
277 }
278
279 /* Assume the buffer heads corresponding to left and right are locked. */
280 static void nilfs_btree_node_move_left(struct nilfs_btree *btree,
281                                        struct nilfs_btree_node *left,
282                                        struct nilfs_btree_node *right,
283                                        int n)
284 {
285         __le64 *ldkeys, *rdkeys;
286         __le64 *ldptrs, *rdptrs;
287         int lnchildren, rnchildren;
288
289         ldkeys = nilfs_btree_node_dkeys(btree, left);
290         ldptrs = nilfs_btree_node_dptrs(btree, left);
291         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
292
293         rdkeys = nilfs_btree_node_dkeys(btree, right);
294         rdptrs = nilfs_btree_node_dptrs(btree, right);
295         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
296
297         memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys));
298         memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs));
299         memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys));
300         memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs));
301
302         lnchildren += n;
303         rnchildren -= n;
304         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
305         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
306 }
307
308 /* Assume that the buffer heads corresponding to left and right are locked. */
309 static void nilfs_btree_node_move_right(struct nilfs_btree *btree,
310                                         struct nilfs_btree_node *left,
311                                         struct nilfs_btree_node *right,
312                                         int n)
313 {
314         __le64 *ldkeys, *rdkeys;
315         __le64 *ldptrs, *rdptrs;
316         int lnchildren, rnchildren;
317
318         ldkeys = nilfs_btree_node_dkeys(btree, left);
319         ldptrs = nilfs_btree_node_dptrs(btree, left);
320         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
321
322         rdkeys = nilfs_btree_node_dkeys(btree, right);
323         rdptrs = nilfs_btree_node_dptrs(btree, right);
324         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
325
326         memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys));
327         memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs));
328         memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys));
329         memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs));
330
331         lnchildren -= n;
332         rnchildren += n;
333         nilfs_btree_node_set_nchildren(btree, left, lnchildren);
334         nilfs_btree_node_set_nchildren(btree, right, rnchildren);
335 }
336
337 /* Assume that the buffer head corresponding to node is locked. */
338 static void nilfs_btree_node_insert(struct nilfs_btree *btree,
339                                     struct nilfs_btree_node *node,
340                                     __u64 key, __u64 ptr, int index)
341 {
342         __le64 *dkeys;
343         __le64 *dptrs;
344         int nchildren;
345
346         dkeys = nilfs_btree_node_dkeys(btree, node);
347         dptrs = nilfs_btree_node_dptrs(btree, node);
348         nchildren = nilfs_btree_node_get_nchildren(btree, node);
349         if (index < nchildren) {
350                 memmove(dkeys + index + 1, dkeys + index,
351                         (nchildren - index) * sizeof(*dkeys));
352                 memmove(dptrs + index + 1, dptrs + index,
353                         (nchildren - index) * sizeof(*dptrs));
354         }
355         dkeys[index] = nilfs_bmap_key_to_dkey(key);
356         dptrs[index] = nilfs_bmap_ptr_to_dptr(ptr);
357         nchildren++;
358         nilfs_btree_node_set_nchildren(btree, node, nchildren);
359 }
360
361 /* Assume that the buffer head corresponding to node is locked. */
362 static void nilfs_btree_node_delete(struct nilfs_btree *btree,
363                                     struct nilfs_btree_node *node,
364                                     __u64 *keyp, __u64 *ptrp, int index)
365 {
366         __u64 key;
367         __u64 ptr;
368         __le64 *dkeys;
369         __le64 *dptrs;
370         int nchildren;
371
372         dkeys = nilfs_btree_node_dkeys(btree, node);
373         dptrs = nilfs_btree_node_dptrs(btree, node);
374         key = nilfs_bmap_dkey_to_key(dkeys[index]);
375         ptr = nilfs_bmap_dptr_to_ptr(dptrs[index]);
376         nchildren = nilfs_btree_node_get_nchildren(btree, node);
377         if (keyp != NULL)
378                 *keyp = key;
379         if (ptrp != NULL)
380                 *ptrp = ptr;
381
382         if (index < nchildren - 1) {
383                 memmove(dkeys + index, dkeys + index + 1,
384                         (nchildren - index - 1) * sizeof(*dkeys));
385                 memmove(dptrs + index, dptrs + index + 1,
386                         (nchildren - index - 1) * sizeof(*dptrs));
387         }
388         nchildren--;
389         nilfs_btree_node_set_nchildren(btree, node, nchildren);
390 }
391
392 static int nilfs_btree_node_lookup(const struct nilfs_btree *btree,
393                                    const struct nilfs_btree_node *node,
394                                    __u64 key, int *indexp)
395 {
396         __u64 nkey;
397         int index, low, high, s;
398
399         /* binary search */
400         low = 0;
401         high = nilfs_btree_node_get_nchildren(btree, node) - 1;
402         index = 0;
403         s = 0;
404         while (low <= high) {
405                 index = (low + high) / 2;
406                 nkey = nilfs_btree_node_get_key(btree, node, index);
407                 if (nkey == key) {
408                         s = 0;
409                         goto out;
410                 } else if (nkey < key) {
411                         low = index + 1;
412                         s = -1;
413                 } else {
414                         high = index - 1;
415                         s = 1;
416                 }
417         }
418
419         /* adjust index */
420         if (nilfs_btree_node_get_level(btree, node) >
421             NILFS_BTREE_LEVEL_NODE_MIN) {
422                 if ((s > 0) && (index > 0))
423                         index--;
424         } else if (s < 0)
425                 index++;
426
427  out:
428         *indexp = index;
429
430         return s == 0;
431 }
432
433 static inline struct nilfs_btree_node *
434 nilfs_btree_get_root(const struct nilfs_btree *btree)
435 {
436         return (struct nilfs_btree_node *)btree->bt_bmap.b_u.u_data;
437 }
438
439 static inline struct nilfs_btree_node *
440 nilfs_btree_get_nonroot_node(const struct nilfs_btree *btree,
441                              const struct nilfs_btree_path *path,
442                              int level)
443 {
444         return (struct nilfs_btree_node *)path[level].bp_bh->b_data;
445 }
446
447 static inline struct nilfs_btree_node *
448 nilfs_btree_get_sib_node(const struct nilfs_btree *btree,
449                          const struct nilfs_btree_path *path,
450                          int level)
451 {
452         return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data;
453 }
454
455 static inline int nilfs_btree_height(const struct nilfs_btree *btree)
456 {
457         return nilfs_btree_node_get_level(btree, nilfs_btree_get_root(btree))
458                 + 1;
459 }
460
461 static inline struct nilfs_btree_node *
462 nilfs_btree_get_node(const struct nilfs_btree *btree,
463                      const struct nilfs_btree_path *path,
464                      int level)
465 {
466         return (level == nilfs_btree_height(btree) - 1) ?
467                 nilfs_btree_get_root(btree) :
468                 nilfs_btree_get_nonroot_node(btree, path, level);
469 }
470
471 static int nilfs_btree_do_lookup(const struct nilfs_btree *btree,
472                                  struct nilfs_btree_path *path,
473                                  __u64 key, __u64 *ptrp, int minlevel)
474 {
475         struct nilfs_btree_node *node;
476         __u64 ptr;
477         int level, index, found, ret;
478
479         node = nilfs_btree_get_root(btree);
480         level = nilfs_btree_node_get_level(btree, node);
481         if ((level < minlevel) ||
482             (nilfs_btree_node_get_nchildren(btree, node) <= 0))
483                 return -ENOENT;
484
485         found = nilfs_btree_node_lookup(btree, node, key, &index);
486         ptr = nilfs_btree_node_get_ptr(btree, node, index);
487         path[level].bp_bh = NULL;
488         path[level].bp_index = index;
489
490         for (level--; level >= minlevel; level--) {
491                 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
492                                            &path[level].bp_bh);
493                 if (ret < 0)
494                         return ret;
495                 node = nilfs_btree_get_nonroot_node(btree, path, level);
496                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
497                 if (!found)
498                         found = nilfs_btree_node_lookup(btree, node, key,
499                                                         &index);
500                 else
501                         index = 0;
502                 if (index < nilfs_btree_node_nchildren_max(btree, node))
503                         ptr = nilfs_btree_node_get_ptr(btree, node, index);
504                 else {
505                         WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN);
506                         /* insert */
507                         ptr = NILFS_BMAP_INVALID_PTR;
508                 }
509                 path[level].bp_index = index;
510         }
511         if (!found)
512                 return -ENOENT;
513
514         if (ptrp != NULL)
515                 *ptrp = ptr;
516
517         return 0;
518 }
519
520 static int nilfs_btree_do_lookup_last(const struct nilfs_btree *btree,
521                                       struct nilfs_btree_path *path,
522                                       __u64 *keyp, __u64 *ptrp)
523 {
524         struct nilfs_btree_node *node;
525         __u64 ptr;
526         int index, level, ret;
527
528         node = nilfs_btree_get_root(btree);
529         index = nilfs_btree_node_get_nchildren(btree, node) - 1;
530         if (index < 0)
531                 return -ENOENT;
532         level = nilfs_btree_node_get_level(btree, node);
533         ptr = nilfs_btree_node_get_ptr(btree, node, index);
534         path[level].bp_bh = NULL;
535         path[level].bp_index = index;
536
537         for (level--; level > 0; level--) {
538                 ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr,
539                                            &path[level].bp_bh);
540                 if (ret < 0)
541                         return ret;
542                 node = nilfs_btree_get_nonroot_node(btree, path, level);
543                 BUG_ON(level != nilfs_btree_node_get_level(btree, node));
544                 index = nilfs_btree_node_get_nchildren(btree, node) - 1;
545                 ptr = nilfs_btree_node_get_ptr(btree, node, index);
546                 path[level].bp_index = index;
547         }
548
549         if (keyp != NULL)
550                 *keyp = nilfs_btree_node_get_key(btree, node, index);
551         if (ptrp != NULL)
552                 *ptrp = ptr;
553
554         return 0;
555 }
556
557 static int nilfs_btree_lookup(const struct nilfs_bmap *bmap,
558                               __u64 key, int level, __u64 *ptrp)
559 {
560         struct nilfs_btree *btree;
561         struct nilfs_btree_path *path;
562         __u64 ptr;
563         int ret;
564
565         btree = (struct nilfs_btree *)bmap;
566         path = nilfs_btree_alloc_path(btree);
567         if (path == NULL)
568                 return -ENOMEM;
569         nilfs_btree_init_path(btree, path);
570
571         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level);
572
573         if (ptrp != NULL)
574                 *ptrp = ptr;
575
576         nilfs_btree_clear_path(btree, path);
577         nilfs_btree_free_path(btree, path);
578
579         return ret;
580 }
581
582 static void nilfs_btree_promote_key(struct nilfs_btree *btree,
583                                     struct nilfs_btree_path *path,
584                                     int level, __u64 key)
585 {
586         if (level < nilfs_btree_height(btree) - 1) {
587                 do {
588                         lock_buffer(path[level].bp_bh);
589                         nilfs_btree_node_set_key(
590                                 btree,
591                                 nilfs_btree_get_nonroot_node(
592                                         btree, path, level),
593                                 path[level].bp_index, key);
594                         if (!buffer_dirty(path[level].bp_bh))
595                                 nilfs_btnode_mark_dirty(path[level].bp_bh);
596                         unlock_buffer(path[level].bp_bh);
597                 } while ((path[level].bp_index == 0) &&
598                          (++level < nilfs_btree_height(btree) - 1));
599         }
600
601         /* root */
602         if (level == nilfs_btree_height(btree) - 1) {
603                 nilfs_btree_node_set_key(btree,
604                                          nilfs_btree_get_root(btree),
605                                          path[level].bp_index, key);
606         }
607 }
608
609 static void nilfs_btree_do_insert(struct nilfs_btree *btree,
610                                   struct nilfs_btree_path *path,
611                                   int level, __u64 *keyp, __u64 *ptrp)
612 {
613         struct nilfs_btree_node *node;
614
615         if (level < nilfs_btree_height(btree) - 1) {
616                 lock_buffer(path[level].bp_bh);
617                 node = nilfs_btree_get_nonroot_node(btree, path, level);
618                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
619                                         path[level].bp_index);
620                 if (!buffer_dirty(path[level].bp_bh))
621                         nilfs_btnode_mark_dirty(path[level].bp_bh);
622                 unlock_buffer(path[level].bp_bh);
623
624                 if (path[level].bp_index == 0)
625                         nilfs_btree_promote_key(btree, path, level + 1,
626                                                 nilfs_btree_node_get_key(
627                                                         btree, node, 0));
628         } else {
629                 node = nilfs_btree_get_root(btree);
630                 nilfs_btree_node_insert(btree, node, *keyp, *ptrp,
631                                         path[level].bp_index);
632         }
633 }
634
635 static void nilfs_btree_carry_left(struct nilfs_btree *btree,
636                                    struct nilfs_btree_path *path,
637                                    int level, __u64 *keyp, __u64 *ptrp)
638 {
639         struct nilfs_btree_node *node, *left;
640         int nchildren, lnchildren, n, move;
641
642         lock_buffer(path[level].bp_bh);
643         lock_buffer(path[level].bp_sib_bh);
644
645         node = nilfs_btree_get_nonroot_node(btree, path, level);
646         left = nilfs_btree_get_sib_node(btree, path, level);
647         nchildren = nilfs_btree_node_get_nchildren(btree, node);
648         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
649         move = 0;
650
651         n = (nchildren + lnchildren + 1) / 2 - lnchildren;
652         if (n > path[level].bp_index) {
653                 /* move insert point */
654                 n--;
655                 move = 1;
656         }
657
658         nilfs_btree_node_move_left(btree, left, node, n);
659
660         if (!buffer_dirty(path[level].bp_bh))
661                 nilfs_btnode_mark_dirty(path[level].bp_bh);
662         if (!buffer_dirty(path[level].bp_sib_bh))
663                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
664
665         unlock_buffer(path[level].bp_bh);
666         unlock_buffer(path[level].bp_sib_bh);
667
668         nilfs_btree_promote_key(btree, path, level + 1,
669                                 nilfs_btree_node_get_key(btree, node, 0));
670
671         if (move) {
672                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
673                 path[level].bp_bh = path[level].bp_sib_bh;
674                 path[level].bp_sib_bh = NULL;
675                 path[level].bp_index += lnchildren;
676                 path[level + 1].bp_index--;
677         } else {
678                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
679                 path[level].bp_sib_bh = NULL;
680                 path[level].bp_index -= n;
681         }
682
683         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
684 }
685
686 static void nilfs_btree_carry_right(struct nilfs_btree *btree,
687                                     struct nilfs_btree_path *path,
688                                     int level, __u64 *keyp, __u64 *ptrp)
689 {
690         struct nilfs_btree_node *node, *right;
691         int nchildren, rnchildren, n, move;
692
693         lock_buffer(path[level].bp_bh);
694         lock_buffer(path[level].bp_sib_bh);
695
696         node = nilfs_btree_get_nonroot_node(btree, path, level);
697         right = nilfs_btree_get_sib_node(btree, path, level);
698         nchildren = nilfs_btree_node_get_nchildren(btree, node);
699         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
700         move = 0;
701
702         n = (nchildren + rnchildren + 1) / 2 - rnchildren;
703         if (n > nchildren - path[level].bp_index) {
704                 /* move insert point */
705                 n--;
706                 move = 1;
707         }
708
709         nilfs_btree_node_move_right(btree, node, right, n);
710
711         if (!buffer_dirty(path[level].bp_bh))
712                 nilfs_btnode_mark_dirty(path[level].bp_bh);
713         if (!buffer_dirty(path[level].bp_sib_bh))
714                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
715
716         unlock_buffer(path[level].bp_bh);
717         unlock_buffer(path[level].bp_sib_bh);
718
719         path[level + 1].bp_index++;
720         nilfs_btree_promote_key(btree, path, level + 1,
721                                 nilfs_btree_node_get_key(btree, right, 0));
722         path[level + 1].bp_index--;
723
724         if (move) {
725                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
726                 path[level].bp_bh = path[level].bp_sib_bh;
727                 path[level].bp_sib_bh = NULL;
728                 path[level].bp_index -=
729                         nilfs_btree_node_get_nchildren(btree, node);
730                 path[level + 1].bp_index++;
731         } else {
732                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
733                 path[level].bp_sib_bh = NULL;
734         }
735
736         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
737 }
738
739 static void nilfs_btree_split(struct nilfs_btree *btree,
740                               struct nilfs_btree_path *path,
741                               int level, __u64 *keyp, __u64 *ptrp)
742 {
743         struct nilfs_btree_node *node, *right;
744         __u64 newkey;
745         __u64 newptr;
746         int nchildren, n, move;
747
748         lock_buffer(path[level].bp_bh);
749         lock_buffer(path[level].bp_sib_bh);
750
751         node = nilfs_btree_get_nonroot_node(btree, path, level);
752         right = nilfs_btree_get_sib_node(btree, path, level);
753         nchildren = nilfs_btree_node_get_nchildren(btree, node);
754         move = 0;
755
756         n = (nchildren + 1) / 2;
757         if (n > nchildren - path[level].bp_index) {
758                 n--;
759                 move = 1;
760         }
761
762         nilfs_btree_node_move_right(btree, node, right, n);
763
764         if (!buffer_dirty(path[level].bp_bh))
765                 nilfs_btnode_mark_dirty(path[level].bp_bh);
766         if (!buffer_dirty(path[level].bp_sib_bh))
767                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
768
769         unlock_buffer(path[level].bp_bh);
770         unlock_buffer(path[level].bp_sib_bh);
771
772         newkey = nilfs_btree_node_get_key(btree, right, 0);
773         newptr = path[level].bp_newreq.bpr_ptr;
774
775         if (move) {
776                 path[level].bp_index -=
777                         nilfs_btree_node_get_nchildren(btree, node);
778                 nilfs_btree_node_insert(btree, right, *keyp, *ptrp,
779                                         path[level].bp_index);
780
781                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
782                 *ptrp = path[level].bp_newreq.bpr_ptr;
783
784                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_bh);
785                 path[level].bp_bh = path[level].bp_sib_bh;
786                 path[level].bp_sib_bh = NULL;
787         } else {
788                 nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
789
790                 *keyp = nilfs_btree_node_get_key(btree, right, 0);
791                 *ptrp = path[level].bp_newreq.bpr_ptr;
792
793                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
794                 path[level].bp_sib_bh = NULL;
795         }
796
797         path[level + 1].bp_index++;
798 }
799
800 static void nilfs_btree_grow(struct nilfs_btree *btree,
801                              struct nilfs_btree_path *path,
802                              int level, __u64 *keyp, __u64 *ptrp)
803 {
804         struct nilfs_btree_node *root, *child;
805         int n;
806
807         lock_buffer(path[level].bp_sib_bh);
808
809         root = nilfs_btree_get_root(btree);
810         child = nilfs_btree_get_sib_node(btree, path, level);
811
812         n = nilfs_btree_node_get_nchildren(btree, root);
813
814         nilfs_btree_node_move_right(btree, root, child, n);
815         nilfs_btree_node_set_level(btree, root, level + 1);
816
817         if (!buffer_dirty(path[level].bp_sib_bh))
818                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
819
820         unlock_buffer(path[level].bp_sib_bh);
821
822         path[level].bp_bh = path[level].bp_sib_bh;
823         path[level].bp_sib_bh = NULL;
824
825         nilfs_btree_do_insert(btree, path, level, keyp, ptrp);
826
827         *keyp = nilfs_btree_node_get_key(btree, child, 0);
828         *ptrp = path[level].bp_newreq.bpr_ptr;
829 }
830
831 static __u64 nilfs_btree_find_near(const struct nilfs_btree *btree,
832                                    const struct nilfs_btree_path *path)
833 {
834         struct nilfs_btree_node *node;
835         int level;
836
837         if (path == NULL)
838                 return NILFS_BMAP_INVALID_PTR;
839
840         /* left sibling */
841         level = NILFS_BTREE_LEVEL_NODE_MIN;
842         if (path[level].bp_index > 0) {
843                 node = nilfs_btree_get_node(btree, path, level);
844                 return nilfs_btree_node_get_ptr(btree, node,
845                                                 path[level].bp_index - 1);
846         }
847
848         /* parent */
849         level = NILFS_BTREE_LEVEL_NODE_MIN + 1;
850         if (level <= nilfs_btree_height(btree) - 1) {
851                 node = nilfs_btree_get_node(btree, path, level);
852                 return nilfs_btree_node_get_ptr(btree, node,
853                                                 path[level].bp_index);
854         }
855
856         return NILFS_BMAP_INVALID_PTR;
857 }
858
859 static __u64 nilfs_btree_find_target_v(const struct nilfs_btree *btree,
860                                        const struct nilfs_btree_path *path,
861                                        __u64 key)
862 {
863         __u64 ptr;
864
865         ptr = nilfs_bmap_find_target_seq(&btree->bt_bmap, key);
866         if (ptr != NILFS_BMAP_INVALID_PTR)
867                 /* sequential access */
868                 return ptr;
869         else {
870                 ptr = nilfs_btree_find_near(btree, path);
871                 if (ptr != NILFS_BMAP_INVALID_PTR)
872                         /* near */
873                         return ptr;
874         }
875         /* block group */
876         return nilfs_bmap_find_target_in_group(&btree->bt_bmap);
877 }
878
879 static void nilfs_btree_set_target_v(struct nilfs_btree *btree, __u64 key,
880                                      __u64 ptr)
881 {
882         btree->bt_bmap.b_last_allocated_key = key;
883         btree->bt_bmap.b_last_allocated_ptr = ptr;
884 }
885
886 static int nilfs_btree_prepare_insert(struct nilfs_btree *btree,
887                                       struct nilfs_btree_path *path,
888                                       int *levelp, __u64 key, __u64 ptr,
889                                       struct nilfs_bmap_stats *stats)
890 {
891         struct buffer_head *bh;
892         struct nilfs_btree_node *node, *parent, *sib;
893         __u64 sibptr;
894         int pindex, level, ret;
895
896         stats->bs_nblocks = 0;
897         level = NILFS_BTREE_LEVEL_DATA;
898
899         /* allocate a new ptr for data block */
900         if (btree->bt_ops->btop_find_target != NULL)
901                 path[level].bp_newreq.bpr_ptr =
902                         btree->bt_ops->btop_find_target(btree, path, key);
903
904         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
905                 &btree->bt_bmap, &path[level].bp_newreq);
906         if (ret < 0)
907                 goto err_out_data;
908
909         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
910              level < nilfs_btree_height(btree) - 1;
911              level++) {
912                 node = nilfs_btree_get_nonroot_node(btree, path, level);
913                 if (nilfs_btree_node_get_nchildren(btree, node) <
914                     nilfs_btree_node_nchildren_max(btree, node)) {
915                         path[level].bp_op = nilfs_btree_do_insert;
916                         stats->bs_nblocks++;
917                         goto out;
918                 }
919
920                 parent = nilfs_btree_get_node(btree, path, level + 1);
921                 pindex = path[level + 1].bp_index;
922
923                 /* left sibling */
924                 if (pindex > 0) {
925                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
926                                                           pindex - 1);
927                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
928                                                    &bh);
929                         if (ret < 0)
930                                 goto err_out_child_node;
931                         sib = (struct nilfs_btree_node *)bh->b_data;
932                         if (nilfs_btree_node_get_nchildren(btree, sib) <
933                             nilfs_btree_node_nchildren_max(btree, sib)) {
934                                 path[level].bp_sib_bh = bh;
935                                 path[level].bp_op = nilfs_btree_carry_left;
936                                 stats->bs_nblocks++;
937                                 goto out;
938                         } else
939                                 nilfs_bmap_put_block(&btree->bt_bmap, bh);
940                 }
941
942                 /* right sibling */
943                 if (pindex <
944                     nilfs_btree_node_get_nchildren(btree, parent) - 1) {
945                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
946                                                           pindex + 1);
947                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
948                                                    &bh);
949                         if (ret < 0)
950                                 goto err_out_child_node;
951                         sib = (struct nilfs_btree_node *)bh->b_data;
952                         if (nilfs_btree_node_get_nchildren(btree, sib) <
953                             nilfs_btree_node_nchildren_max(btree, sib)) {
954                                 path[level].bp_sib_bh = bh;
955                                 path[level].bp_op = nilfs_btree_carry_right;
956                                 stats->bs_nblocks++;
957                                 goto out;
958                         } else
959                                 nilfs_bmap_put_block(&btree->bt_bmap, bh);
960                 }
961
962                 /* split */
963                 path[level].bp_newreq.bpr_ptr =
964                         path[level - 1].bp_newreq.bpr_ptr + 1;
965                 ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
966                         &btree->bt_bmap, &path[level].bp_newreq);
967                 if (ret < 0)
968                         goto err_out_child_node;
969                 ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
970                                                path[level].bp_newreq.bpr_ptr,
971                                                &bh);
972                 if (ret < 0)
973                         goto err_out_curr_node;
974
975                 stats->bs_nblocks++;
976
977                 lock_buffer(bh);
978                 nilfs_btree_node_init(btree,
979                                       (struct nilfs_btree_node *)bh->b_data,
980                                       0, level, 0, NULL, NULL);
981                 unlock_buffer(bh);
982                 path[level].bp_sib_bh = bh;
983                 path[level].bp_op = nilfs_btree_split;
984         }
985
986         /* root */
987         node = nilfs_btree_get_root(btree);
988         if (nilfs_btree_node_get_nchildren(btree, node) <
989             nilfs_btree_node_nchildren_max(btree, node)) {
990                 path[level].bp_op = nilfs_btree_do_insert;
991                 stats->bs_nblocks++;
992                 goto out;
993         }
994
995         /* grow */
996         path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1;
997         ret = btree->bt_bmap.b_pops->bpop_prepare_alloc_ptr(
998                 &btree->bt_bmap, &path[level].bp_newreq);
999         if (ret < 0)
1000                 goto err_out_child_node;
1001         ret = nilfs_bmap_get_new_block(&btree->bt_bmap,
1002                                        path[level].bp_newreq.bpr_ptr, &bh);
1003         if (ret < 0)
1004                 goto err_out_curr_node;
1005
1006         lock_buffer(bh);
1007         nilfs_btree_node_init(btree, (struct nilfs_btree_node *)bh->b_data,
1008                               0, level, 0, NULL, NULL);
1009         unlock_buffer(bh);
1010         path[level].bp_sib_bh = bh;
1011         path[level].bp_op = nilfs_btree_grow;
1012
1013         level++;
1014         path[level].bp_op = nilfs_btree_do_insert;
1015
1016         /* a newly-created node block and a data block are added */
1017         stats->bs_nblocks += 2;
1018
1019         /* success */
1020  out:
1021         *levelp = level;
1022         return ret;
1023
1024         /* error */
1025  err_out_curr_node:
1026         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1027                                                     &path[level].bp_newreq);
1028  err_out_child_node:
1029         for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) {
1030                 nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1031                 btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(
1032                         &btree->bt_bmap, &path[level].bp_newreq);
1033
1034         }
1035
1036         btree->bt_bmap.b_pops->bpop_abort_alloc_ptr(&btree->bt_bmap,
1037                                                        &path[level].bp_newreq);
1038  err_out_data:
1039         *levelp = level;
1040         stats->bs_nblocks = 0;
1041         return ret;
1042 }
1043
1044 static void nilfs_btree_commit_insert(struct nilfs_btree *btree,
1045                                       struct nilfs_btree_path *path,
1046                                       int maxlevel, __u64 key, __u64 ptr)
1047 {
1048         int level;
1049
1050         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1051         ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr;
1052         if (btree->bt_ops->btop_set_target != NULL)
1053                 btree->bt_ops->btop_set_target(btree, key, ptr);
1054
1055         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1056                 if (btree->bt_bmap.b_pops->bpop_commit_alloc_ptr != NULL) {
1057                         btree->bt_bmap.b_pops->bpop_commit_alloc_ptr(
1058                                 &btree->bt_bmap, &path[level - 1].bp_newreq);
1059                 }
1060                 path[level].bp_op(btree, path, level, &key, &ptr);
1061         }
1062
1063         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1064                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1065 }
1066
1067 static int nilfs_btree_insert(struct nilfs_bmap *bmap, __u64 key, __u64 ptr)
1068 {
1069         struct nilfs_btree *btree;
1070         struct nilfs_btree_path *path;
1071         struct nilfs_bmap_stats stats;
1072         int level, ret;
1073
1074         btree = (struct nilfs_btree *)bmap;
1075         path = nilfs_btree_alloc_path(btree);
1076         if (path == NULL)
1077                 return -ENOMEM;
1078         nilfs_btree_init_path(btree, path);
1079
1080         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1081                                     NILFS_BTREE_LEVEL_NODE_MIN);
1082         if (ret != -ENOENT) {
1083                 if (ret == 0)
1084                         ret = -EEXIST;
1085                 goto out;
1086         }
1087
1088         ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats);
1089         if (ret < 0)
1090                 goto out;
1091         nilfs_btree_commit_insert(btree, path, level, key, ptr);
1092         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1093
1094  out:
1095         nilfs_btree_clear_path(btree, path);
1096         nilfs_btree_free_path(btree, path);
1097         return ret;
1098 }
1099
1100 static void nilfs_btree_do_delete(struct nilfs_btree *btree,
1101                                   struct nilfs_btree_path *path,
1102                                   int level, __u64 *keyp, __u64 *ptrp)
1103 {
1104         struct nilfs_btree_node *node;
1105
1106         if (level < nilfs_btree_height(btree) - 1) {
1107                 lock_buffer(path[level].bp_bh);
1108                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1109                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1110                                         path[level].bp_index);
1111                 if (!buffer_dirty(path[level].bp_bh))
1112                         nilfs_btnode_mark_dirty(path[level].bp_bh);
1113                 unlock_buffer(path[level].bp_bh);
1114                 if (path[level].bp_index == 0)
1115                         nilfs_btree_promote_key(btree, path, level + 1,
1116                                 nilfs_btree_node_get_key(btree, node, 0));
1117         } else {
1118                 node = nilfs_btree_get_root(btree);
1119                 nilfs_btree_node_delete(btree, node, keyp, ptrp,
1120                                         path[level].bp_index);
1121         }
1122 }
1123
1124 static void nilfs_btree_borrow_left(struct nilfs_btree *btree,
1125                                     struct nilfs_btree_path *path,
1126                                     int level, __u64 *keyp, __u64 *ptrp)
1127 {
1128         struct nilfs_btree_node *node, *left;
1129         int nchildren, lnchildren, n;
1130
1131         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1132
1133         lock_buffer(path[level].bp_bh);
1134         lock_buffer(path[level].bp_sib_bh);
1135
1136         node = nilfs_btree_get_nonroot_node(btree, path, level);
1137         left = nilfs_btree_get_sib_node(btree, path, level);
1138         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1139         lnchildren = nilfs_btree_node_get_nchildren(btree, left);
1140
1141         n = (nchildren + lnchildren) / 2 - nchildren;
1142
1143         nilfs_btree_node_move_right(btree, left, node, n);
1144
1145         if (!buffer_dirty(path[level].bp_bh))
1146                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1147         if (!buffer_dirty(path[level].bp_sib_bh))
1148                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1149
1150         unlock_buffer(path[level].bp_bh);
1151         unlock_buffer(path[level].bp_sib_bh);
1152
1153         nilfs_btree_promote_key(btree, path, level + 1,
1154                                 nilfs_btree_node_get_key(btree, node, 0));
1155
1156         nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1157         path[level].bp_sib_bh = NULL;
1158         path[level].bp_index += n;
1159 }
1160
1161 static void nilfs_btree_borrow_right(struct nilfs_btree *btree,
1162                                      struct nilfs_btree_path *path,
1163                                      int level, __u64 *keyp, __u64 *ptrp)
1164 {
1165         struct nilfs_btree_node *node, *right;
1166         int nchildren, rnchildren, n;
1167
1168         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1169
1170         lock_buffer(path[level].bp_bh);
1171         lock_buffer(path[level].bp_sib_bh);
1172
1173         node = nilfs_btree_get_nonroot_node(btree, path, level);
1174         right = nilfs_btree_get_sib_node(btree, path, level);
1175         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1176         rnchildren = nilfs_btree_node_get_nchildren(btree, right);
1177
1178         n = (nchildren + rnchildren) / 2 - nchildren;
1179
1180         nilfs_btree_node_move_left(btree, node, right, n);
1181
1182         if (!buffer_dirty(path[level].bp_bh))
1183                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1184         if (!buffer_dirty(path[level].bp_sib_bh))
1185                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1186
1187         unlock_buffer(path[level].bp_bh);
1188         unlock_buffer(path[level].bp_sib_bh);
1189
1190         path[level + 1].bp_index++;
1191         nilfs_btree_promote_key(btree, path, level + 1,
1192                                 nilfs_btree_node_get_key(btree, right, 0));
1193         path[level + 1].bp_index--;
1194
1195         nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1196         path[level].bp_sib_bh = NULL;
1197 }
1198
1199 static void nilfs_btree_concat_left(struct nilfs_btree *btree,
1200                                     struct nilfs_btree_path *path,
1201                                     int level, __u64 *keyp, __u64 *ptrp)
1202 {
1203         struct nilfs_btree_node *node, *left;
1204         int n;
1205
1206         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1207
1208         lock_buffer(path[level].bp_bh);
1209         lock_buffer(path[level].bp_sib_bh);
1210
1211         node = nilfs_btree_get_nonroot_node(btree, path, level);
1212         left = nilfs_btree_get_sib_node(btree, path, level);
1213
1214         n = nilfs_btree_node_get_nchildren(btree, node);
1215
1216         nilfs_btree_node_move_left(btree, left, node, n);
1217
1218         if (!buffer_dirty(path[level].bp_sib_bh))
1219                 nilfs_btnode_mark_dirty(path[level].bp_sib_bh);
1220
1221         unlock_buffer(path[level].bp_bh);
1222         unlock_buffer(path[level].bp_sib_bh);
1223
1224         nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1225         path[level].bp_bh = path[level].bp_sib_bh;
1226         path[level].bp_sib_bh = NULL;
1227         path[level].bp_index += nilfs_btree_node_get_nchildren(btree, left);
1228 }
1229
1230 static void nilfs_btree_concat_right(struct nilfs_btree *btree,
1231                                      struct nilfs_btree_path *path,
1232                                      int level, __u64 *keyp, __u64 *ptrp)
1233 {
1234         struct nilfs_btree_node *node, *right;
1235         int n;
1236
1237         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1238
1239         lock_buffer(path[level].bp_bh);
1240         lock_buffer(path[level].bp_sib_bh);
1241
1242         node = nilfs_btree_get_nonroot_node(btree, path, level);
1243         right = nilfs_btree_get_sib_node(btree, path, level);
1244
1245         n = nilfs_btree_node_get_nchildren(btree, right);
1246
1247         nilfs_btree_node_move_left(btree, node, right, n);
1248
1249         if (!buffer_dirty(path[level].bp_bh))
1250                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1251
1252         unlock_buffer(path[level].bp_bh);
1253         unlock_buffer(path[level].bp_sib_bh);
1254
1255         nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_sib_bh);
1256         path[level].bp_sib_bh = NULL;
1257         path[level + 1].bp_index++;
1258 }
1259
1260 static void nilfs_btree_shrink(struct nilfs_btree *btree,
1261                                struct nilfs_btree_path *path,
1262                                int level, __u64 *keyp, __u64 *ptrp)
1263 {
1264         struct nilfs_btree_node *root, *child;
1265         int n;
1266
1267         nilfs_btree_do_delete(btree, path, level, keyp, ptrp);
1268
1269         lock_buffer(path[level].bp_bh);
1270         root = nilfs_btree_get_root(btree);
1271         child = nilfs_btree_get_nonroot_node(btree, path, level);
1272
1273         nilfs_btree_node_delete(btree, root, NULL, NULL, 0);
1274         nilfs_btree_node_set_level(btree, root, level);
1275         n = nilfs_btree_node_get_nchildren(btree, child);
1276         nilfs_btree_node_move_left(btree, root, child, n);
1277         unlock_buffer(path[level].bp_bh);
1278
1279         nilfs_bmap_delete_block(&btree->bt_bmap, path[level].bp_bh);
1280         path[level].bp_bh = NULL;
1281 }
1282
1283
1284 static int nilfs_btree_prepare_delete(struct nilfs_btree *btree,
1285                                       struct nilfs_btree_path *path,
1286                                       int *levelp,
1287                                       struct nilfs_bmap_stats *stats)
1288 {
1289         struct buffer_head *bh;
1290         struct nilfs_btree_node *node, *parent, *sib;
1291         __u64 sibptr;
1292         int pindex, level, ret;
1293
1294         ret = 0;
1295         stats->bs_nblocks = 0;
1296         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1297              level < nilfs_btree_height(btree) - 1;
1298              level++) {
1299                 node = nilfs_btree_get_nonroot_node(btree, path, level);
1300                 path[level].bp_oldreq.bpr_ptr =
1301                         nilfs_btree_node_get_ptr(btree, node,
1302                                                  path[level].bp_index);
1303                 if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1304                         ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1305                                 &btree->bt_bmap, &path[level].bp_oldreq);
1306                         if (ret < 0)
1307                                 goto err_out_child_node;
1308                 }
1309
1310                 if (nilfs_btree_node_get_nchildren(btree, node) >
1311                     nilfs_btree_node_nchildren_min(btree, node)) {
1312                         path[level].bp_op = nilfs_btree_do_delete;
1313                         stats->bs_nblocks++;
1314                         goto out;
1315                 }
1316
1317                 parent = nilfs_btree_get_node(btree, path, level + 1);
1318                 pindex = path[level + 1].bp_index;
1319
1320                 if (pindex > 0) {
1321                         /* left sibling */
1322                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1323                                                           pindex - 1);
1324                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1325                                                    &bh);
1326                         if (ret < 0)
1327                                 goto err_out_curr_node;
1328                         sib = (struct nilfs_btree_node *)bh->b_data;
1329                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1330                             nilfs_btree_node_nchildren_min(btree, sib)) {
1331                                 path[level].bp_sib_bh = bh;
1332                                 path[level].bp_op = nilfs_btree_borrow_left;
1333                                 stats->bs_nblocks++;
1334                                 goto out;
1335                         } else {
1336                                 path[level].bp_sib_bh = bh;
1337                                 path[level].bp_op = nilfs_btree_concat_left;
1338                                 stats->bs_nblocks++;
1339                                 /* continue; */
1340                         }
1341                 } else if (pindex <
1342                            nilfs_btree_node_get_nchildren(btree, parent) - 1) {
1343                         /* right sibling */
1344                         sibptr = nilfs_btree_node_get_ptr(btree, parent,
1345                                                           pindex + 1);
1346                         ret = nilfs_bmap_get_block(&btree->bt_bmap, sibptr,
1347                                                    &bh);
1348                         if (ret < 0)
1349                                 goto err_out_curr_node;
1350                         sib = (struct nilfs_btree_node *)bh->b_data;
1351                         if (nilfs_btree_node_get_nchildren(btree, sib) >
1352                             nilfs_btree_node_nchildren_min(btree, sib)) {
1353                                 path[level].bp_sib_bh = bh;
1354                                 path[level].bp_op = nilfs_btree_borrow_right;
1355                                 stats->bs_nblocks++;
1356                                 goto out;
1357                         } else {
1358                                 path[level].bp_sib_bh = bh;
1359                                 path[level].bp_op = nilfs_btree_concat_right;
1360                                 stats->bs_nblocks++;
1361                                 /* continue; */
1362                         }
1363                 } else {
1364                         /* no siblings */
1365                         /* the only child of the root node */
1366                         WARN_ON(level != nilfs_btree_height(btree) - 2);
1367                         if (nilfs_btree_node_get_nchildren(btree, node) - 1 <=
1368                             NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1369                                 path[level].bp_op = nilfs_btree_shrink;
1370                                 stats->bs_nblocks += 2;
1371                         } else {
1372                                 path[level].bp_op = nilfs_btree_do_delete;
1373                                 stats->bs_nblocks++;
1374                         }
1375
1376                         goto out;
1377
1378                 }
1379         }
1380
1381         node = nilfs_btree_get_root(btree);
1382         path[level].bp_oldreq.bpr_ptr =
1383                 nilfs_btree_node_get_ptr(btree, node, path[level].bp_index);
1384         if (btree->bt_bmap.b_pops->bpop_prepare_end_ptr != NULL) {
1385                 ret = btree->bt_bmap.b_pops->bpop_prepare_end_ptr(
1386                         &btree->bt_bmap, &path[level].bp_oldreq);
1387                 if (ret < 0)
1388                         goto err_out_child_node;
1389         }
1390         /* child of the root node is deleted */
1391         path[level].bp_op = nilfs_btree_do_delete;
1392         stats->bs_nblocks++;
1393
1394         /* success */
1395  out:
1396         *levelp = level;
1397         return ret;
1398
1399         /* error */
1400  err_out_curr_node:
1401         if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1402                 btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1403                         &btree->bt_bmap, &path[level].bp_oldreq);
1404  err_out_child_node:
1405         for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) {
1406                 nilfs_bmap_put_block(&btree->bt_bmap, path[level].bp_sib_bh);
1407                 if (btree->bt_bmap.b_pops->bpop_abort_end_ptr != NULL)
1408                         btree->bt_bmap.b_pops->bpop_abort_end_ptr(
1409                                 &btree->bt_bmap, &path[level].bp_oldreq);
1410         }
1411         *levelp = level;
1412         stats->bs_nblocks = 0;
1413         return ret;
1414 }
1415
1416 static void nilfs_btree_commit_delete(struct nilfs_btree *btree,
1417                                       struct nilfs_btree_path *path,
1418                                       int maxlevel)
1419 {
1420         int level;
1421
1422         for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) {
1423                 if (btree->bt_bmap.b_pops->bpop_commit_end_ptr != NULL)
1424                         btree->bt_bmap.b_pops->bpop_commit_end_ptr(
1425                                 &btree->bt_bmap, &path[level].bp_oldreq);
1426                 path[level].bp_op(btree, path, level, NULL, NULL);
1427         }
1428
1429         if (!nilfs_bmap_dirty(&btree->bt_bmap))
1430                 nilfs_bmap_set_dirty(&btree->bt_bmap);
1431 }
1432
1433 static int nilfs_btree_delete(struct nilfs_bmap *bmap, __u64 key)
1434
1435 {
1436         struct nilfs_btree *btree;
1437         struct nilfs_btree_path *path;
1438         struct nilfs_bmap_stats stats;
1439         int level, ret;
1440
1441         btree = (struct nilfs_btree *)bmap;
1442         path = nilfs_btree_alloc_path(btree);
1443         if (path == NULL)
1444                 return -ENOMEM;
1445         nilfs_btree_init_path(btree, path);
1446         ret = nilfs_btree_do_lookup(btree, path, key, NULL,
1447                                     NILFS_BTREE_LEVEL_NODE_MIN);
1448         if (ret < 0)
1449                 goto out;
1450
1451         ret = nilfs_btree_prepare_delete(btree, path, &level, &stats);
1452         if (ret < 0)
1453                 goto out;
1454         nilfs_btree_commit_delete(btree, path, level);
1455         nilfs_bmap_sub_blocks(bmap, stats.bs_nblocks);
1456
1457 out:
1458         nilfs_btree_clear_path(btree, path);
1459         nilfs_btree_free_path(btree, path);
1460         return ret;
1461 }
1462
1463 static int nilfs_btree_last_key(const struct nilfs_bmap *bmap, __u64 *keyp)
1464 {
1465         struct nilfs_btree *btree;
1466         struct nilfs_btree_path *path;
1467         int ret;
1468
1469         btree = (struct nilfs_btree *)bmap;
1470         path = nilfs_btree_alloc_path(btree);
1471         if (path == NULL)
1472                 return -ENOMEM;
1473         nilfs_btree_init_path(btree, path);
1474
1475         ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL);
1476
1477         nilfs_btree_clear_path(btree, path);
1478         nilfs_btree_free_path(btree, path);
1479
1480         return ret;
1481 }
1482
1483 static int nilfs_btree_check_delete(struct nilfs_bmap *bmap, __u64 key)
1484 {
1485         struct buffer_head *bh;
1486         struct nilfs_btree *btree;
1487         struct nilfs_btree_node *root, *node;
1488         __u64 maxkey, nextmaxkey;
1489         __u64 ptr;
1490         int nchildren, ret;
1491
1492         btree = (struct nilfs_btree *)bmap;
1493         root = nilfs_btree_get_root(btree);
1494         switch (nilfs_btree_height(btree)) {
1495         case 2:
1496                 bh = NULL;
1497                 node = root;
1498                 break;
1499         case 3:
1500                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1501                 if (nchildren > 1)
1502                         return 0;
1503                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1504                 ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1505                 if (ret < 0)
1506                         return ret;
1507                 node = (struct nilfs_btree_node *)bh->b_data;
1508                 break;
1509         default:
1510                 return 0;
1511         }
1512
1513         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1514         maxkey = nilfs_btree_node_get_key(btree, node, nchildren - 1);
1515         nextmaxkey = (nchildren > 1) ?
1516                 nilfs_btree_node_get_key(btree, node, nchildren - 2) : 0;
1517         if (bh != NULL)
1518                 nilfs_bmap_put_block(bmap, bh);
1519
1520         return (maxkey == key) && (nextmaxkey < bmap->b_low);
1521 }
1522
1523 static int nilfs_btree_gather_data(struct nilfs_bmap *bmap,
1524                                    __u64 *keys, __u64 *ptrs, int nitems)
1525 {
1526         struct buffer_head *bh;
1527         struct nilfs_btree *btree;
1528         struct nilfs_btree_node *node, *root;
1529         __le64 *dkeys;
1530         __le64 *dptrs;
1531         __u64 ptr;
1532         int nchildren, i, ret;
1533
1534         btree = (struct nilfs_btree *)bmap;
1535         root = nilfs_btree_get_root(btree);
1536         switch (nilfs_btree_height(btree)) {
1537         case 2:
1538                 bh = NULL;
1539                 node = root;
1540                 break;
1541         case 3:
1542                 nchildren = nilfs_btree_node_get_nchildren(btree, root);
1543                 WARN_ON(nchildren > 1);
1544                 ptr = nilfs_btree_node_get_ptr(btree, root, nchildren - 1);
1545                 ret = nilfs_bmap_get_block(bmap, ptr, &bh);
1546                 if (ret < 0)
1547                         return ret;
1548                 node = (struct nilfs_btree_node *)bh->b_data;
1549                 break;
1550         default:
1551                 node = NULL;
1552                 return -EINVAL;
1553         }
1554
1555         nchildren = nilfs_btree_node_get_nchildren(btree, node);
1556         if (nchildren < nitems)
1557                 nitems = nchildren;
1558         dkeys = nilfs_btree_node_dkeys(btree, node);
1559         dptrs = nilfs_btree_node_dptrs(btree, node);
1560         for (i = 0; i < nitems; i++) {
1561                 keys[i] = nilfs_bmap_dkey_to_key(dkeys[i]);
1562                 ptrs[i] = nilfs_bmap_dptr_to_ptr(dptrs[i]);
1563         }
1564
1565         if (bh != NULL)
1566                 nilfs_bmap_put_block(bmap, bh);
1567
1568         return nitems;
1569 }
1570
1571 static int
1572 nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *bmap, __u64 key,
1573                                        union nilfs_bmap_ptr_req *dreq,
1574                                        union nilfs_bmap_ptr_req *nreq,
1575                                        struct buffer_head **bhp,
1576                                        struct nilfs_bmap_stats *stats)
1577 {
1578         struct buffer_head *bh;
1579         struct nilfs_btree *btree;
1580         int ret;
1581
1582         btree = (struct nilfs_btree *)bmap;
1583         stats->bs_nblocks = 0;
1584
1585         /* for data */
1586         /* cannot find near ptr */
1587         if (btree->bt_ops->btop_find_target != NULL)
1588                 dreq->bpr_ptr
1589                         = btree->bt_ops->btop_find_target(btree, NULL, key);
1590         ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, dreq);
1591         if (ret < 0)
1592                 return ret;
1593
1594         *bhp = NULL;
1595         stats->bs_nblocks++;
1596         if (nreq != NULL) {
1597                 nreq->bpr_ptr = dreq->bpr_ptr + 1;
1598                 ret = bmap->b_pops->bpop_prepare_alloc_ptr(bmap, nreq);
1599                 if (ret < 0)
1600                         goto err_out_dreq;
1601
1602                 ret = nilfs_bmap_get_new_block(bmap, nreq->bpr_ptr, &bh);
1603                 if (ret < 0)
1604                         goto err_out_nreq;
1605
1606                 *bhp = bh;
1607                 stats->bs_nblocks++;
1608         }
1609
1610         /* success */
1611         return 0;
1612
1613         /* error */
1614  err_out_nreq:
1615         bmap->b_pops->bpop_abort_alloc_ptr(bmap, nreq);
1616  err_out_dreq:
1617         bmap->b_pops->bpop_abort_alloc_ptr(bmap, dreq);
1618         stats->bs_nblocks = 0;
1619         return ret;
1620
1621 }
1622
1623 static void
1624 nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *bmap,
1625                                       __u64 key, __u64 ptr,
1626                                       const __u64 *keys, const __u64 *ptrs,
1627                                       int n, __u64 low, __u64 high,
1628                                       union nilfs_bmap_ptr_req *dreq,
1629                                       union nilfs_bmap_ptr_req *nreq,
1630                                       struct buffer_head *bh)
1631 {
1632         struct nilfs_btree *btree;
1633         struct nilfs_btree_node *node;
1634         __u64 tmpptr;
1635
1636         /* free resources */
1637         if (bmap->b_ops->bop_clear != NULL)
1638                 bmap->b_ops->bop_clear(bmap);
1639
1640         /* ptr must be a pointer to a buffer head. */
1641         set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr));
1642
1643         /* convert and insert */
1644         btree = (struct nilfs_btree *)bmap;
1645         nilfs_btree_init(bmap, low, high);
1646         if (nreq != NULL) {
1647                 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL) {
1648                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1649                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, nreq);
1650                 }
1651
1652                 /* create child node at level 1 */
1653                 lock_buffer(bh);
1654                 node = (struct nilfs_btree_node *)bh->b_data;
1655                 nilfs_btree_node_init(btree, node, 0, 1, n, keys, ptrs);
1656                 nilfs_btree_node_insert(btree, node,
1657                                         key, dreq->bpr_ptr, n);
1658                 if (!buffer_dirty(bh))
1659                         nilfs_btnode_mark_dirty(bh);
1660                 if (!nilfs_bmap_dirty(bmap))
1661                         nilfs_bmap_set_dirty(bmap);
1662
1663                 unlock_buffer(bh);
1664                 nilfs_bmap_put_block(bmap, bh);
1665
1666                 /* create root node at level 2 */
1667                 node = nilfs_btree_get_root(btree);
1668                 tmpptr = nreq->bpr_ptr;
1669                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1670                                       2, 1, &keys[0], &tmpptr);
1671         } else {
1672                 if (bmap->b_pops->bpop_commit_alloc_ptr != NULL)
1673                         bmap->b_pops->bpop_commit_alloc_ptr(bmap, dreq);
1674
1675                 /* create root node at level 1 */
1676                 node = nilfs_btree_get_root(btree);
1677                 nilfs_btree_node_init(btree, node, NILFS_BTREE_NODE_ROOT,
1678                                       1, n, keys, ptrs);
1679                 nilfs_btree_node_insert(btree, node,
1680                                         key, dreq->bpr_ptr, n);
1681                 if (!nilfs_bmap_dirty(bmap))
1682                         nilfs_bmap_set_dirty(bmap);
1683         }
1684
1685         if (btree->bt_ops->btop_set_target != NULL)
1686                 btree->bt_ops->btop_set_target(btree, key, dreq->bpr_ptr);
1687 }
1688
1689 /**
1690  * nilfs_btree_convert_and_insert -
1691  * @bmap:
1692  * @key:
1693  * @ptr:
1694  * @keys:
1695  * @ptrs:
1696  * @n:
1697  * @low:
1698  * @high:
1699  */
1700 int nilfs_btree_convert_and_insert(struct nilfs_bmap *bmap,
1701                                    __u64 key, __u64 ptr,
1702                                    const __u64 *keys, const __u64 *ptrs,
1703                                    int n, __u64 low, __u64 high)
1704 {
1705         struct buffer_head *bh;
1706         union nilfs_bmap_ptr_req dreq, nreq, *di, *ni;
1707         struct nilfs_bmap_stats stats;
1708         int ret;
1709
1710         if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) {
1711                 di = &dreq;
1712                 ni = NULL;
1713         } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX(
1714                            1 << bmap->b_inode->i_blkbits)) {
1715                 di = &dreq;
1716                 ni = &nreq;
1717         } else {
1718                 di = NULL;
1719                 ni = NULL;
1720                 BUG();
1721         }
1722
1723         ret = nilfs_btree_prepare_convert_and_insert(bmap, key, di, ni, &bh,
1724                                                      &stats);
1725         if (ret < 0)
1726                 return ret;
1727         nilfs_btree_commit_convert_and_insert(bmap, key, ptr, keys, ptrs, n,
1728                                               low, high, di, ni, bh);
1729         nilfs_bmap_add_blocks(bmap, stats.bs_nblocks);
1730         return 0;
1731 }
1732
1733 static int nilfs_btree_propagate_p(struct nilfs_btree *btree,
1734                                    struct nilfs_btree_path *path,
1735                                    int level,
1736                                    struct buffer_head *bh)
1737 {
1738         while ((++level < nilfs_btree_height(btree) - 1) &&
1739                !buffer_dirty(path[level].bp_bh))
1740                 nilfs_btnode_mark_dirty(path[level].bp_bh);
1741
1742         return 0;
1743 }
1744
1745 static int nilfs_btree_prepare_update_v(struct nilfs_btree *btree,
1746                                         struct nilfs_btree_path *path,
1747                                         int level)
1748 {
1749         struct nilfs_btree_node *parent;
1750         int ret;
1751
1752         parent = nilfs_btree_get_node(btree, path, level + 1);
1753         path[level].bp_oldreq.bpr_ptr =
1754                 nilfs_btree_node_get_ptr(btree, parent,
1755                                          path[level + 1].bp_index);
1756         path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1;
1757         ret = nilfs_bmap_prepare_update(&btree->bt_bmap,
1758                                         &path[level].bp_oldreq,
1759                                         &path[level].bp_newreq);
1760         if (ret < 0)
1761                 return ret;
1762
1763         if (buffer_nilfs_node(path[level].bp_bh)) {
1764                 path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr;
1765                 path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr;
1766                 path[level].bp_ctxt.bh = path[level].bp_bh;
1767                 ret = nilfs_btnode_prepare_change_key(
1768                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1769                         &path[level].bp_ctxt);
1770                 if (ret < 0) {
1771                         nilfs_bmap_abort_update(&btree->bt_bmap,
1772                                                 &path[level].bp_oldreq,
1773                                                 &path[level].bp_newreq);
1774                         return ret;
1775                 }
1776         }
1777
1778         return 0;
1779 }
1780
1781 static void nilfs_btree_commit_update_v(struct nilfs_btree *btree,
1782                                         struct nilfs_btree_path *path,
1783                                         int level)
1784 {
1785         struct nilfs_btree_node *parent;
1786
1787         nilfs_bmap_commit_update(&btree->bt_bmap,
1788                                  &path[level].bp_oldreq,
1789                                  &path[level].bp_newreq);
1790
1791         if (buffer_nilfs_node(path[level].bp_bh)) {
1792                 nilfs_btnode_commit_change_key(
1793                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1794                         &path[level].bp_ctxt);
1795                 path[level].bp_bh = path[level].bp_ctxt.bh;
1796         }
1797         set_buffer_nilfs_volatile(path[level].bp_bh);
1798
1799         parent = nilfs_btree_get_node(btree, path, level + 1);
1800         nilfs_btree_node_set_ptr(btree, parent, path[level + 1].bp_index,
1801                                  path[level].bp_newreq.bpr_ptr);
1802 }
1803
1804 static void nilfs_btree_abort_update_v(struct nilfs_btree *btree,
1805                                        struct nilfs_btree_path *path,
1806                                        int level)
1807 {
1808         nilfs_bmap_abort_update(&btree->bt_bmap,
1809                                 &path[level].bp_oldreq,
1810                                 &path[level].bp_newreq);
1811         if (buffer_nilfs_node(path[level].bp_bh))
1812                 nilfs_btnode_abort_change_key(
1813                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
1814                         &path[level].bp_ctxt);
1815 }
1816
1817 static int nilfs_btree_prepare_propagate_v(struct nilfs_btree *btree,
1818                                            struct nilfs_btree_path *path,
1819                                            int minlevel,
1820                                            int *maxlevelp)
1821 {
1822         int level, ret;
1823
1824         level = minlevel;
1825         if (!buffer_nilfs_volatile(path[level].bp_bh)) {
1826                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1827                 if (ret < 0)
1828                         return ret;
1829         }
1830         while ((++level < nilfs_btree_height(btree) - 1) &&
1831                !buffer_dirty(path[level].bp_bh)) {
1832
1833                 WARN_ON(buffer_nilfs_volatile(path[level].bp_bh));
1834                 ret = nilfs_btree_prepare_update_v(btree, path, level);
1835                 if (ret < 0)
1836                         goto out;
1837         }
1838
1839         /* success */
1840         *maxlevelp = level - 1;
1841         return 0;
1842
1843         /* error */
1844  out:
1845         while (--level > minlevel)
1846                 nilfs_btree_abort_update_v(btree, path, level);
1847         if (!buffer_nilfs_volatile(path[level].bp_bh))
1848                 nilfs_btree_abort_update_v(btree, path, level);
1849         return ret;
1850 }
1851
1852 static void nilfs_btree_commit_propagate_v(struct nilfs_btree *btree,
1853                                            struct nilfs_btree_path *path,
1854                                            int minlevel,
1855                                            int maxlevel,
1856                                            struct buffer_head *bh)
1857 {
1858         int level;
1859
1860         if (!buffer_nilfs_volatile(path[minlevel].bp_bh))
1861                 nilfs_btree_commit_update_v(btree, path, minlevel);
1862
1863         for (level = minlevel + 1; level <= maxlevel; level++)
1864                 nilfs_btree_commit_update_v(btree, path, level);
1865 }
1866
1867 static int nilfs_btree_propagate_v(struct nilfs_btree *btree,
1868                                    struct nilfs_btree_path *path,
1869                                    int level,
1870                                    struct buffer_head *bh)
1871 {
1872         int maxlevel, ret;
1873         struct nilfs_btree_node *parent;
1874         __u64 ptr;
1875
1876         get_bh(bh);
1877         path[level].bp_bh = bh;
1878         ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel);
1879         if (ret < 0)
1880                 goto out;
1881
1882         if (buffer_nilfs_volatile(path[level].bp_bh)) {
1883                 parent = nilfs_btree_get_node(btree, path, level + 1);
1884                 ptr = nilfs_btree_node_get_ptr(btree, parent,
1885                                                path[level + 1].bp_index);
1886                 ret = nilfs_bmap_mark_dirty(&btree->bt_bmap, ptr);
1887                 if (ret < 0)
1888                         goto out;
1889         }
1890
1891         nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh);
1892
1893  out:
1894         brelse(path[level].bp_bh);
1895         path[level].bp_bh = NULL;
1896         return ret;
1897 }
1898
1899 static int nilfs_btree_propagate(const struct nilfs_bmap *bmap,
1900                                  struct buffer_head *bh)
1901 {
1902         struct nilfs_btree *btree;
1903         struct nilfs_btree_path *path;
1904         struct nilfs_btree_node *node;
1905         __u64 key;
1906         int level, ret;
1907
1908         WARN_ON(!buffer_dirty(bh));
1909
1910         btree = (struct nilfs_btree *)bmap;
1911         path = nilfs_btree_alloc_path(btree);
1912         if (path == NULL)
1913                 return -ENOMEM;
1914         nilfs_btree_init_path(btree, path);
1915
1916         if (buffer_nilfs_node(bh)) {
1917                 node = (struct nilfs_btree_node *)bh->b_data;
1918                 key = nilfs_btree_node_get_key(btree, node, 0);
1919                 level = nilfs_btree_node_get_level(btree, node);
1920         } else {
1921                 key = nilfs_bmap_data_get_key(bmap, bh);
1922                 level = NILFS_BTREE_LEVEL_DATA;
1923         }
1924
1925         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
1926         if (ret < 0) {
1927                 if (unlikely(ret == -ENOENT))
1928                         printk(KERN_CRIT "%s: key = %llu, level == %d\n",
1929                                __func__, (unsigned long long)key, level);
1930                 goto out;
1931         }
1932
1933         ret = btree->bt_ops->btop_propagate(btree, path, level, bh);
1934
1935  out:
1936         nilfs_btree_clear_path(btree, path);
1937         nilfs_btree_free_path(btree, path);
1938
1939         return ret;
1940 }
1941
1942 static int nilfs_btree_propagate_gc(const struct nilfs_bmap *bmap,
1943                                     struct buffer_head *bh)
1944 {
1945         return nilfs_bmap_mark_dirty(bmap, bh->b_blocknr);
1946 }
1947
1948 static void nilfs_btree_add_dirty_buffer(struct nilfs_btree *btree,
1949                                          struct list_head *lists,
1950                                          struct buffer_head *bh)
1951 {
1952         struct list_head *head;
1953         struct buffer_head *cbh;
1954         struct nilfs_btree_node *node, *cnode;
1955         __u64 key, ckey;
1956         int level;
1957
1958         get_bh(bh);
1959         node = (struct nilfs_btree_node *)bh->b_data;
1960         key = nilfs_btree_node_get_key(btree, node, 0);
1961         level = nilfs_btree_node_get_level(btree, node);
1962         list_for_each(head, &lists[level]) {
1963                 cbh = list_entry(head, struct buffer_head, b_assoc_buffers);
1964                 cnode = (struct nilfs_btree_node *)cbh->b_data;
1965                 ckey = nilfs_btree_node_get_key(btree, cnode, 0);
1966                 if (key < ckey)
1967                         break;
1968         }
1969         list_add_tail(&bh->b_assoc_buffers, head);
1970 }
1971
1972 static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *bmap,
1973                                              struct list_head *listp)
1974 {
1975         struct nilfs_btree *btree = (struct nilfs_btree *)bmap;
1976         struct address_space *btcache = &NILFS_BMAP_I(bmap)->i_btnode_cache;
1977         struct list_head lists[NILFS_BTREE_LEVEL_MAX];
1978         struct pagevec pvec;
1979         struct buffer_head *bh, *head;
1980         pgoff_t index = 0;
1981         int level, i;
1982
1983         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
1984              level < NILFS_BTREE_LEVEL_MAX;
1985              level++)
1986                 INIT_LIST_HEAD(&lists[level]);
1987
1988         pagevec_init(&pvec, 0);
1989
1990         while (pagevec_lookup_tag(&pvec, btcache, &index, PAGECACHE_TAG_DIRTY,
1991                                   PAGEVEC_SIZE)) {
1992                 for (i = 0; i < pagevec_count(&pvec); i++) {
1993                         bh = head = page_buffers(pvec.pages[i]);
1994                         do {
1995                                 if (buffer_dirty(bh))
1996                                         nilfs_btree_add_dirty_buffer(btree,
1997                                                                      lists, bh);
1998                         } while ((bh = bh->b_this_page) != head);
1999                 }
2000                 pagevec_release(&pvec);
2001                 cond_resched();
2002         }
2003
2004         for (level = NILFS_BTREE_LEVEL_NODE_MIN;
2005              level < NILFS_BTREE_LEVEL_MAX;
2006              level++)
2007                 list_splice(&lists[level], listp->prev);
2008 }
2009
2010 static int nilfs_btree_assign_p(struct nilfs_btree *btree,
2011                                 struct nilfs_btree_path *path,
2012                                 int level,
2013                                 struct buffer_head **bh,
2014                                 sector_t blocknr,
2015                                 union nilfs_binfo *binfo)
2016 {
2017         struct nilfs_btree_node *parent;
2018         __u64 key;
2019         __u64 ptr;
2020         int ret;
2021
2022         parent = nilfs_btree_get_node(btree, path, level + 1);
2023         ptr = nilfs_btree_node_get_ptr(btree, parent,
2024                                        path[level + 1].bp_index);
2025         if (buffer_nilfs_node(*bh)) {
2026                 path[level].bp_ctxt.oldkey = ptr;
2027                 path[level].bp_ctxt.newkey = blocknr;
2028                 path[level].bp_ctxt.bh = *bh;
2029                 ret = nilfs_btnode_prepare_change_key(
2030                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2031                         &path[level].bp_ctxt);
2032                 if (ret < 0)
2033                         return ret;
2034                 nilfs_btnode_commit_change_key(
2035                         &NILFS_BMAP_I(&btree->bt_bmap)->i_btnode_cache,
2036                         &path[level].bp_ctxt);
2037                 *bh = path[level].bp_ctxt.bh;
2038         }
2039
2040         nilfs_btree_node_set_ptr(btree, parent,
2041                                  path[level + 1].bp_index, blocknr);
2042
2043         key = nilfs_btree_node_get_key(btree, parent,
2044                                        path[level + 1].bp_index);
2045         /* on-disk format */
2046         binfo->bi_dat.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2047         binfo->bi_dat.bi_level = level;
2048
2049         return 0;
2050 }
2051
2052 static int nilfs_btree_assign_v(struct nilfs_btree *btree,
2053                                 struct nilfs_btree_path *path,
2054                                 int level,
2055                                 struct buffer_head **bh,
2056                                 sector_t blocknr,
2057                                 union nilfs_binfo *binfo)
2058 {
2059         struct nilfs_btree_node *parent;
2060         __u64 key;
2061         __u64 ptr;
2062         union nilfs_bmap_ptr_req req;
2063         int ret;
2064
2065         parent = nilfs_btree_get_node(btree, path, level + 1);
2066         ptr = nilfs_btree_node_get_ptr(btree, parent,
2067                                        path[level + 1].bp_index);
2068         req.bpr_ptr = ptr;
2069         ret = btree->bt_bmap.b_pops->bpop_prepare_start_ptr(&btree->bt_bmap,
2070                                                                &req);
2071         if (ret < 0)
2072                 return ret;
2073         btree->bt_bmap.b_pops->bpop_commit_start_ptr(&btree->bt_bmap,
2074                                                         &req, blocknr);
2075
2076         key = nilfs_btree_node_get_key(btree, parent,
2077                                        path[level + 1].bp_index);
2078         /* on-disk format */
2079         binfo->bi_v.bi_vblocknr = nilfs_bmap_ptr_to_dptr(ptr);
2080         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2081
2082         return 0;
2083 }
2084
2085 static int nilfs_btree_assign(struct nilfs_bmap *bmap,
2086                               struct buffer_head **bh,
2087                               sector_t blocknr,
2088                               union nilfs_binfo *binfo)
2089 {
2090         struct nilfs_btree *btree;
2091         struct nilfs_btree_path *path;
2092         struct nilfs_btree_node *node;
2093         __u64 key;
2094         int level, ret;
2095
2096         btree = (struct nilfs_btree *)bmap;
2097         path = nilfs_btree_alloc_path(btree);
2098         if (path == NULL)
2099                 return -ENOMEM;
2100         nilfs_btree_init_path(btree, path);
2101
2102         if (buffer_nilfs_node(*bh)) {
2103                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2104                 key = nilfs_btree_node_get_key(btree, node, 0);
2105                 level = nilfs_btree_node_get_level(btree, node);
2106         } else {
2107                 key = nilfs_bmap_data_get_key(bmap, *bh);
2108                 level = NILFS_BTREE_LEVEL_DATA;
2109         }
2110
2111         ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1);
2112         if (ret < 0) {
2113                 WARN_ON(ret == -ENOENT);
2114                 goto out;
2115         }
2116
2117         ret = btree->bt_ops->btop_assign(btree, path, level, bh,
2118                                             blocknr, binfo);
2119
2120  out:
2121         nilfs_btree_clear_path(btree, path);
2122         nilfs_btree_free_path(btree, path);
2123
2124         return ret;
2125 }
2126
2127 static int nilfs_btree_assign_gc(struct nilfs_bmap *bmap,
2128                                  struct buffer_head **bh,
2129                                  sector_t blocknr,
2130                                  union nilfs_binfo *binfo)
2131 {
2132         struct nilfs_btree *btree;
2133         struct nilfs_btree_node *node;
2134         __u64 key;
2135         int ret;
2136
2137         btree = (struct nilfs_btree *)bmap;
2138         ret = nilfs_bmap_move_v(bmap, (*bh)->b_blocknr, blocknr);
2139         if (ret < 0)
2140                 return ret;
2141
2142         if (buffer_nilfs_node(*bh)) {
2143                 node = (struct nilfs_btree_node *)(*bh)->b_data;
2144                 key = nilfs_btree_node_get_key(btree, node, 0);
2145         } else
2146                 key = nilfs_bmap_data_get_key(bmap, *bh);
2147
2148         /* on-disk format */
2149         binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr);
2150         binfo->bi_v.bi_blkoff = nilfs_bmap_key_to_dkey(key);
2151
2152         return 0;
2153 }
2154
2155 static int nilfs_btree_mark(struct nilfs_bmap *bmap, __u64 key, int level)
2156 {
2157         struct buffer_head *bh;
2158         struct nilfs_btree *btree;
2159         struct nilfs_btree_path *path;
2160         __u64 ptr;
2161         int ret;
2162
2163         btree = (struct nilfs_btree *)bmap;
2164         path = nilfs_btree_alloc_path(btree);
2165         if (path == NULL)
2166                 return -ENOMEM;
2167         nilfs_btree_init_path(btree, path);
2168
2169         ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1);
2170         if (ret < 0) {
2171                 WARN_ON(ret == -ENOENT);
2172                 goto out;
2173         }
2174         ret = nilfs_bmap_get_block(&btree->bt_bmap, ptr, &bh);
2175         if (ret < 0) {
2176                 WARN_ON(ret == -ENOENT);
2177                 goto out;
2178         }
2179
2180         if (!buffer_dirty(bh))
2181                 nilfs_btnode_mark_dirty(bh);
2182         nilfs_bmap_put_block(&btree->bt_bmap, bh);
2183         if (!nilfs_bmap_dirty(&btree->bt_bmap))
2184                 nilfs_bmap_set_dirty(&btree->bt_bmap);
2185
2186  out:
2187         nilfs_btree_clear_path(btree, path);
2188         nilfs_btree_free_path(btree, path);
2189         return ret;
2190 }
2191
2192 static const struct nilfs_bmap_operations nilfs_btree_ops = {
2193         .bop_lookup             =       nilfs_btree_lookup,
2194         .bop_insert             =       nilfs_btree_insert,
2195         .bop_delete             =       nilfs_btree_delete,
2196         .bop_clear              =       NULL,
2197
2198         .bop_propagate          =       nilfs_btree_propagate,
2199
2200         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2201
2202         .bop_assign             =       nilfs_btree_assign,
2203         .bop_mark               =       nilfs_btree_mark,
2204
2205         .bop_last_key           =       nilfs_btree_last_key,
2206         .bop_check_insert       =       NULL,
2207         .bop_check_delete       =       nilfs_btree_check_delete,
2208         .bop_gather_data        =       nilfs_btree_gather_data,
2209 };
2210
2211 static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
2212         .bop_lookup             =       NULL,
2213         .bop_insert             =       NULL,
2214         .bop_delete             =       NULL,
2215         .bop_clear              =       NULL,
2216
2217         .bop_propagate          =       nilfs_btree_propagate_gc,
2218
2219         .bop_lookup_dirty_buffers =     nilfs_btree_lookup_dirty_buffers,
2220
2221         .bop_assign             =       nilfs_btree_assign_gc,
2222         .bop_mark               =       NULL,
2223
2224         .bop_last_key           =       NULL,
2225         .bop_check_insert       =       NULL,
2226         .bop_check_delete       =       NULL,
2227         .bop_gather_data        =       NULL,
2228 };
2229
2230 static const struct nilfs_btree_operations nilfs_btree_ops_v = {
2231         .btop_find_target       =       nilfs_btree_find_target_v,
2232         .btop_set_target        =       nilfs_btree_set_target_v,
2233         .btop_propagate         =       nilfs_btree_propagate_v,
2234         .btop_assign            =       nilfs_btree_assign_v,
2235 };
2236
2237 static const struct nilfs_btree_operations nilfs_btree_ops_p = {
2238         .btop_find_target       =       NULL,
2239         .btop_set_target        =       NULL,
2240         .btop_propagate         =       nilfs_btree_propagate_p,
2241         .btop_assign            =       nilfs_btree_assign_p,
2242 };
2243
2244 int nilfs_btree_init(struct nilfs_bmap *bmap, __u64 low, __u64 high)
2245 {
2246         struct nilfs_btree *btree;
2247
2248         btree = (struct nilfs_btree *)bmap;
2249         bmap->b_ops = &nilfs_btree_ops;
2250         bmap->b_low = low;
2251         bmap->b_high = high;
2252         switch (bmap->b_inode->i_ino) {
2253         case NILFS_DAT_INO:
2254                 btree->bt_ops = &nilfs_btree_ops_p;
2255                 break;
2256         default:
2257                 btree->bt_ops = &nilfs_btree_ops_v;
2258                 break;
2259         }
2260
2261         return 0;
2262 }
2263
2264 void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
2265 {
2266         bmap->b_low = NILFS_BMAP_LARGE_LOW;
2267         bmap->b_high = NILFS_BMAP_LARGE_HIGH;
2268         bmap->b_ops = &nilfs_btree_ops_gc;
2269 }