[XFS] Check agf_btreeblks is valid when reading in the AGF
[linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_inode_item.h"
38 #include "xfs_btree.h"
39 #include "xfs_btree_trace.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_error.h"
42
43 /*
44  * Cursor allocation zone.
45  */
46 kmem_zone_t     *xfs_btree_cur_zone;
47
48 /*
49  * Btree magic numbers.
50  */
51 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
52         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
53 };
54
55
56 int                                     /* error (0 or EFSCORRUPTED) */
57 xfs_btree_check_lblock(
58         struct xfs_btree_cur    *cur,   /* btree cursor */
59         struct xfs_btree_lblock *block, /* btree long form block pointer */
60         int                     level,  /* level of the btree block */
61         struct xfs_buf          *bp)    /* buffer for block, if any */
62 {
63         int                     lblock_ok; /* block passes checks */
64         struct xfs_mount        *mp;    /* file system mount point */
65
66         mp = cur->bc_mp;
67         lblock_ok =
68                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
69                 be16_to_cpu(block->bb_level) == level &&
70                 be16_to_cpu(block->bb_numrecs) <=
71                         cur->bc_ops->get_maxrecs(cur, level) &&
72                 block->bb_leftsib &&
73                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
74                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
75                 block->bb_rightsib &&
76                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
77                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
78         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
79                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
80                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
81                 if (bp)
82                         xfs_buftrace("LBTREE ERROR", bp);
83                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
84                                  mp);
85                 return XFS_ERROR(EFSCORRUPTED);
86         }
87         return 0;
88 }
89
90 STATIC int                              /* error (0 or EFSCORRUPTED) */
91 xfs_btree_check_sblock(
92         struct xfs_btree_cur    *cur,   /* btree cursor */
93         struct xfs_btree_sblock *block, /* btree short form block pointer */
94         int                     level,  /* level of the btree block */
95         struct xfs_buf          *bp)    /* buffer containing block */
96 {
97         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
98         struct xfs_agf          *agf;   /* ag. freespace structure */
99         xfs_agblock_t           agflen; /* native ag. freespace length */
100         int                     sblock_ok; /* block passes checks */
101
102         agbp = cur->bc_private.a.agbp;
103         agf = XFS_BUF_TO_AGF(agbp);
104         agflen = be32_to_cpu(agf->agf_length);
105         sblock_ok =
106                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
107                 be16_to_cpu(block->bb_level) == level &&
108                 be16_to_cpu(block->bb_numrecs) <=
109                         cur->bc_ops->get_maxrecs(cur, level) &&
110                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
111                  be32_to_cpu(block->bb_leftsib) < agflen) &&
112                 block->bb_leftsib &&
113                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
114                  be32_to_cpu(block->bb_rightsib) < agflen) &&
115                 block->bb_rightsib;
116         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
117                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
118                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
119                 if (bp)
120                         xfs_buftrace("SBTREE ERROR", bp);
121                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
122                                  cur->bc_mp);
123                 return XFS_ERROR(EFSCORRUPTED);
124         }
125         return 0;
126 }
127
128 /*
129  * Debug routine: check that block header is ok.
130  */
131 int
132 xfs_btree_check_block(
133         struct xfs_btree_cur    *cur,   /* btree cursor */
134         struct xfs_btree_block  *block, /* generic btree block pointer */
135         int                     level,  /* level of the btree block */
136         struct xfs_buf          *bp)    /* buffer containing block, if any */
137 {
138         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
139                 return xfs_btree_check_lblock(cur,
140                                 (struct xfs_btree_lblock *)block, level, bp);
141         } else {
142                 return xfs_btree_check_sblock(cur,
143                                 (struct xfs_btree_sblock *)block, level, bp);
144         }
145 }
146
147 /*
148  * Check that (long) pointer is ok.
149  */
150 int                                     /* error (0 or EFSCORRUPTED) */
151 xfs_btree_check_lptr(
152         struct xfs_btree_cur    *cur,   /* btree cursor */
153         xfs_dfsbno_t            bno,    /* btree block disk address */
154         int                     level)  /* btree block level */
155 {
156         XFS_WANT_CORRUPTED_RETURN(
157                 level > 0 &&
158                 bno != NULLDFSBNO &&
159                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
160         return 0;
161 }
162
163 #ifdef DEBUG
164 /*
165  * Check that (short) pointer is ok.
166  */
167 STATIC int                              /* error (0 or EFSCORRUPTED) */
168 xfs_btree_check_sptr(
169         struct xfs_btree_cur    *cur,   /* btree cursor */
170         xfs_agblock_t           bno,    /* btree block disk address */
171         int                     level)  /* btree block level */
172 {
173         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
174
175         XFS_WANT_CORRUPTED_RETURN(
176                 level > 0 &&
177                 bno != NULLAGBLOCK &&
178                 bno != 0 &&
179                 bno < agblocks);
180         return 0;
181 }
182
183 /*
184  * Check that block ptr is ok.
185  */
186 STATIC int                              /* error (0 or EFSCORRUPTED) */
187 xfs_btree_check_ptr(
188         struct xfs_btree_cur    *cur,   /* btree cursor */
189         union xfs_btree_ptr     *ptr,   /* btree block disk address */
190         int                     index,  /* offset from ptr to check */
191         int                     level)  /* btree block level */
192 {
193         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
194                 return xfs_btree_check_lptr(cur,
195                                 be64_to_cpu((&ptr->l)[index]), level);
196         } else {
197                 return xfs_btree_check_sptr(cur,
198                                 be32_to_cpu((&ptr->s)[index]), level);
199         }
200 }
201 #endif
202
203 /*
204  * Delete the btree cursor.
205  */
206 void
207 xfs_btree_del_cursor(
208         xfs_btree_cur_t *cur,           /* btree cursor */
209         int             error)          /* del because of error */
210 {
211         int             i;              /* btree level */
212
213         /*
214          * Clear the buffer pointers, and release the buffers.
215          * If we're doing this in the face of an error, we
216          * need to make sure to inspect all of the entries
217          * in the bc_bufs array for buffers to be unlocked.
218          * This is because some of the btree code works from
219          * level n down to 0, and if we get an error along
220          * the way we won't have initialized all the entries
221          * down to 0.
222          */
223         for (i = 0; i < cur->bc_nlevels; i++) {
224                 if (cur->bc_bufs[i])
225                         xfs_btree_setbuf(cur, i, NULL);
226                 else if (!error)
227                         break;
228         }
229         /*
230          * Can't free a bmap cursor without having dealt with the
231          * allocated indirect blocks' accounting.
232          */
233         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
234                cur->bc_private.b.allocated == 0);
235         /*
236          * Free the cursor.
237          */
238         kmem_zone_free(xfs_btree_cur_zone, cur);
239 }
240
241 /*
242  * Duplicate the btree cursor.
243  * Allocate a new one, copy the record, re-get the buffers.
244  */
245 int                                     /* error */
246 xfs_btree_dup_cursor(
247         xfs_btree_cur_t *cur,           /* input cursor */
248         xfs_btree_cur_t **ncur)         /* output cursor */
249 {
250         xfs_buf_t       *bp;            /* btree block's buffer pointer */
251         int             error;          /* error return value */
252         int             i;              /* level number of btree block */
253         xfs_mount_t     *mp;            /* mount structure for filesystem */
254         xfs_btree_cur_t *new;           /* new cursor value */
255         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
256
257         tp = cur->bc_tp;
258         mp = cur->bc_mp;
259
260         /*
261          * Allocate a new cursor like the old one.
262          */
263         new = cur->bc_ops->dup_cursor(cur);
264
265         /*
266          * Copy the record currently in the cursor.
267          */
268         new->bc_rec = cur->bc_rec;
269
270         /*
271          * For each level current, re-get the buffer and copy the ptr value.
272          */
273         for (i = 0; i < new->bc_nlevels; i++) {
274                 new->bc_ptrs[i] = cur->bc_ptrs[i];
275                 new->bc_ra[i] = cur->bc_ra[i];
276                 if ((bp = cur->bc_bufs[i])) {
277                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
278                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
279                                 xfs_btree_del_cursor(new, error);
280                                 *ncur = NULL;
281                                 return error;
282                         }
283                         new->bc_bufs[i] = bp;
284                         ASSERT(bp);
285                         ASSERT(!XFS_BUF_GETERROR(bp));
286                 } else
287                         new->bc_bufs[i] = NULL;
288         }
289         *ncur = new;
290         return 0;
291 }
292
293 /*
294  * XFS btree block layout and addressing:
295  *
296  * There are two types of blocks in the btree: leaf and non-leaf blocks.
297  *
298  * The leaf record start with a header then followed by records containing
299  * the values.  A non-leaf block also starts with the same header, and
300  * then first contains lookup keys followed by an equal number of pointers
301  * to the btree blocks at the previous level.
302  *
303  *              +--------+-------+-------+-------+-------+-------+-------+
304  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
305  *              +--------+-------+-------+-------+-------+-------+-------+
306  *
307  *              +--------+-------+-------+-------+-------+-------+-------+
308  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
309  *              +--------+-------+-------+-------+-------+-------+-------+
310  *
311  * The header is called struct xfs_btree_block for reasons better left unknown
312  * and comes in different versions for short (32bit) and long (64bit) block
313  * pointers.  The record and key structures are defined by the btree instances
314  * and opaque to the btree core.  The block pointers are simple disk endian
315  * integers, available in a short (32bit) and long (64bit) variant.
316  *
317  * The helpers below calculate the offset of a given record, key or pointer
318  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
319  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
320  * inside the btree block is done using indices starting at one, not zero!
321  */
322
323 /*
324  * Return size of the btree block header for this btree instance.
325  */
326 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
327 {
328         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
329                 sizeof(struct xfs_btree_lblock) :
330                 sizeof(struct xfs_btree_sblock);
331 }
332
333 /*
334  * Return size of btree block pointers for this btree instance.
335  */
336 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
337 {
338         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
339                 sizeof(__be64) : sizeof(__be32);
340 }
341
342 /*
343  * Calculate offset of the n-th record in a btree block.
344  */
345 STATIC size_t
346 xfs_btree_rec_offset(
347         struct xfs_btree_cur    *cur,
348         int                     n)
349 {
350         return xfs_btree_block_len(cur) +
351                 (n - 1) * cur->bc_ops->rec_len;
352 }
353
354 /*
355  * Calculate offset of the n-th key in a btree block.
356  */
357 STATIC size_t
358 xfs_btree_key_offset(
359         struct xfs_btree_cur    *cur,
360         int                     n)
361 {
362         return xfs_btree_block_len(cur) +
363                 (n - 1) * cur->bc_ops->key_len;
364 }
365
366 /*
367  * Calculate offset of the n-th block pointer in a btree block.
368  */
369 STATIC size_t
370 xfs_btree_ptr_offset(
371         struct xfs_btree_cur    *cur,
372         int                     n,
373         int                     level)
374 {
375         return xfs_btree_block_len(cur) +
376                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
377                 (n - 1) * xfs_btree_ptr_len(cur);
378 }
379
380 /*
381  * Return a pointer to the n-th record in the btree block.
382  */
383 STATIC union xfs_btree_rec *
384 xfs_btree_rec_addr(
385         struct xfs_btree_cur    *cur,
386         int                     n,
387         struct xfs_btree_block  *block)
388 {
389         return (union xfs_btree_rec *)
390                 ((char *)block + xfs_btree_rec_offset(cur, n));
391 }
392
393 /*
394  * Return a pointer to the n-th key in the btree block.
395  */
396 STATIC union xfs_btree_key *
397 xfs_btree_key_addr(
398         struct xfs_btree_cur    *cur,
399         int                     n,
400         struct xfs_btree_block  *block)
401 {
402         return (union xfs_btree_key *)
403                 ((char *)block + xfs_btree_key_offset(cur, n));
404 }
405
406 /*
407  * Return a pointer to the n-th block pointer in the btree block.
408  */
409 STATIC union xfs_btree_ptr *
410 xfs_btree_ptr_addr(
411         struct xfs_btree_cur    *cur,
412         int                     n,
413         struct xfs_btree_block  *block)
414 {
415         int                     level = xfs_btree_get_level(block);
416
417         ASSERT(block->bb_level != 0);
418
419         return (union xfs_btree_ptr *)
420                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
421 }
422
423 /*
424  * Get a the root block which is stored in the inode.
425  *
426  * For now this btree implementation assumes the btree root is always
427  * stored in the if_broot field of an inode fork.
428  */
429 STATIC struct xfs_btree_block *
430 xfs_btree_get_iroot(
431        struct xfs_btree_cur    *cur)
432 {
433        struct xfs_ifork        *ifp;
434
435        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
436        return (struct xfs_btree_block *)ifp->if_broot;
437 }
438
439 /*
440  * Retrieve the block pointer from the cursor at the given level.
441  * This may be an inode btree root or from a buffer.
442  */
443 STATIC struct xfs_btree_block *         /* generic btree block pointer */
444 xfs_btree_get_block(
445         struct xfs_btree_cur    *cur,   /* btree cursor */
446         int                     level,  /* level in btree */
447         struct xfs_buf          **bpp)  /* buffer containing the block */
448 {
449         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
450             (level == cur->bc_nlevels - 1)) {
451                 *bpp = NULL;
452                 return xfs_btree_get_iroot(cur);
453         }
454
455         *bpp = cur->bc_bufs[level];
456         return XFS_BUF_TO_BLOCK(*bpp);
457 }
458
459 /*
460  * Get a buffer for the block, return it with no data read.
461  * Long-form addressing.
462  */
463 xfs_buf_t *                             /* buffer for fsbno */
464 xfs_btree_get_bufl(
465         xfs_mount_t     *mp,            /* file system mount point */
466         xfs_trans_t     *tp,            /* transaction pointer */
467         xfs_fsblock_t   fsbno,          /* file system block number */
468         uint            lock)           /* lock flags for get_buf */
469 {
470         xfs_buf_t       *bp;            /* buffer pointer (return value) */
471         xfs_daddr_t             d;              /* real disk block address */
472
473         ASSERT(fsbno != NULLFSBLOCK);
474         d = XFS_FSB_TO_DADDR(mp, fsbno);
475         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
476         ASSERT(bp);
477         ASSERT(!XFS_BUF_GETERROR(bp));
478         return bp;
479 }
480
481 /*
482  * Get a buffer for the block, return it with no data read.
483  * Short-form addressing.
484  */
485 xfs_buf_t *                             /* buffer for agno/agbno */
486 xfs_btree_get_bufs(
487         xfs_mount_t     *mp,            /* file system mount point */
488         xfs_trans_t     *tp,            /* transaction pointer */
489         xfs_agnumber_t  agno,           /* allocation group number */
490         xfs_agblock_t   agbno,          /* allocation group block number */
491         uint            lock)           /* lock flags for get_buf */
492 {
493         xfs_buf_t       *bp;            /* buffer pointer (return value) */
494         xfs_daddr_t             d;              /* real disk block address */
495
496         ASSERT(agno != NULLAGNUMBER);
497         ASSERT(agbno != NULLAGBLOCK);
498         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
499         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
500         ASSERT(bp);
501         ASSERT(!XFS_BUF_GETERROR(bp));
502         return bp;
503 }
504
505 /*
506  * Check for the cursor referring to the last block at the given level.
507  */
508 int                                     /* 1=is last block, 0=not last block */
509 xfs_btree_islastblock(
510         xfs_btree_cur_t         *cur,   /* btree cursor */
511         int                     level)  /* level to check */
512 {
513         xfs_btree_block_t       *block; /* generic btree block pointer */
514         xfs_buf_t               *bp;    /* buffer containing block */
515
516         block = xfs_btree_get_block(cur, level, &bp);
517         xfs_btree_check_block(cur, block, level, bp);
518         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
519                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
520         else
521                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
522 }
523
524 /*
525  * Change the cursor to point to the first record at the given level.
526  * Other levels are unaffected.
527  */
528 STATIC int                              /* success=1, failure=0 */
529 xfs_btree_firstrec(
530         xfs_btree_cur_t         *cur,   /* btree cursor */
531         int                     level)  /* level to change */
532 {
533         xfs_btree_block_t       *block; /* generic btree block pointer */
534         xfs_buf_t               *bp;    /* buffer containing block */
535
536         /*
537          * Get the block pointer for this level.
538          */
539         block = xfs_btree_get_block(cur, level, &bp);
540         xfs_btree_check_block(cur, block, level, bp);
541         /*
542          * It's empty, there is no such record.
543          */
544         if (!block->bb_numrecs)
545                 return 0;
546         /*
547          * Set the ptr value to 1, that's the first record/key.
548          */
549         cur->bc_ptrs[level] = 1;
550         return 1;
551 }
552
553 /*
554  * Change the cursor to point to the last record in the current block
555  * at the given level.  Other levels are unaffected.
556  */
557 STATIC int                              /* success=1, failure=0 */
558 xfs_btree_lastrec(
559         xfs_btree_cur_t         *cur,   /* btree cursor */
560         int                     level)  /* level to change */
561 {
562         xfs_btree_block_t       *block; /* generic btree block pointer */
563         xfs_buf_t               *bp;    /* buffer containing block */
564
565         /*
566          * Get the block pointer for this level.
567          */
568         block = xfs_btree_get_block(cur, level, &bp);
569         xfs_btree_check_block(cur, block, level, bp);
570         /*
571          * It's empty, there is no such record.
572          */
573         if (!block->bb_numrecs)
574                 return 0;
575         /*
576          * Set the ptr value to numrecs, that's the last record/key.
577          */
578         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
579         return 1;
580 }
581
582 /*
583  * Compute first and last byte offsets for the fields given.
584  * Interprets the offsets table, which contains struct field offsets.
585  */
586 void
587 xfs_btree_offsets(
588         __int64_t       fields,         /* bitmask of fields */
589         const short     *offsets,       /* table of field offsets */
590         int             nbits,          /* number of bits to inspect */
591         int             *first,         /* output: first byte offset */
592         int             *last)          /* output: last byte offset */
593 {
594         int             i;              /* current bit number */
595         __int64_t       imask;          /* mask for current bit number */
596
597         ASSERT(fields != 0);
598         /*
599          * Find the lowest bit, so the first byte offset.
600          */
601         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
602                 if (imask & fields) {
603                         *first = offsets[i];
604                         break;
605                 }
606         }
607         /*
608          * Find the highest bit, so the last byte offset.
609          */
610         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
611                 if (imask & fields) {
612                         *last = offsets[i + 1] - 1;
613                         break;
614                 }
615         }
616 }
617
618 /*
619  * Get a buffer for the block, return it read in.
620  * Long-form addressing.
621  */
622 int                                     /* error */
623 xfs_btree_read_bufl(
624         xfs_mount_t     *mp,            /* file system mount point */
625         xfs_trans_t     *tp,            /* transaction pointer */
626         xfs_fsblock_t   fsbno,          /* file system block number */
627         uint            lock,           /* lock flags for read_buf */
628         xfs_buf_t       **bpp,          /* buffer for fsbno */
629         int             refval)         /* ref count value for buffer */
630 {
631         xfs_buf_t       *bp;            /* return value */
632         xfs_daddr_t             d;              /* real disk block address */
633         int             error;
634
635         ASSERT(fsbno != NULLFSBLOCK);
636         d = XFS_FSB_TO_DADDR(mp, fsbno);
637         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
638                         mp->m_bsize, lock, &bp))) {
639                 return error;
640         }
641         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
642         if (bp != NULL) {
643                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
644         }
645         *bpp = bp;
646         return 0;
647 }
648
649 /*
650  * Get a buffer for the block, return it read in.
651  * Short-form addressing.
652  */
653 int                                     /* error */
654 xfs_btree_read_bufs(
655         xfs_mount_t     *mp,            /* file system mount point */
656         xfs_trans_t     *tp,            /* transaction pointer */
657         xfs_agnumber_t  agno,           /* allocation group number */
658         xfs_agblock_t   agbno,          /* allocation group block number */
659         uint            lock,           /* lock flags for read_buf */
660         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
661         int             refval)         /* ref count value for buffer */
662 {
663         xfs_buf_t       *bp;            /* return value */
664         xfs_daddr_t     d;              /* real disk block address */
665         int             error;
666
667         ASSERT(agno != NULLAGNUMBER);
668         ASSERT(agbno != NULLAGBLOCK);
669         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
670         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
671                                         mp->m_bsize, lock, &bp))) {
672                 return error;
673         }
674         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
675         if (bp != NULL) {
676                 switch (refval) {
677                 case XFS_ALLOC_BTREE_REF:
678                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
679                         break;
680                 case XFS_INO_BTREE_REF:
681                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
682                         break;
683                 }
684         }
685         *bpp = bp;
686         return 0;
687 }
688
689 /*
690  * Read-ahead the block, don't wait for it, don't return a buffer.
691  * Long-form addressing.
692  */
693 /* ARGSUSED */
694 void
695 xfs_btree_reada_bufl(
696         xfs_mount_t     *mp,            /* file system mount point */
697         xfs_fsblock_t   fsbno,          /* file system block number */
698         xfs_extlen_t    count)          /* count of filesystem blocks */
699 {
700         xfs_daddr_t             d;
701
702         ASSERT(fsbno != NULLFSBLOCK);
703         d = XFS_FSB_TO_DADDR(mp, fsbno);
704         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
705 }
706
707 /*
708  * Read-ahead the block, don't wait for it, don't return a buffer.
709  * Short-form addressing.
710  */
711 /* ARGSUSED */
712 void
713 xfs_btree_reada_bufs(
714         xfs_mount_t     *mp,            /* file system mount point */
715         xfs_agnumber_t  agno,           /* allocation group number */
716         xfs_agblock_t   agbno,          /* allocation group block number */
717         xfs_extlen_t    count)          /* count of filesystem blocks */
718 {
719         xfs_daddr_t             d;
720
721         ASSERT(agno != NULLAGNUMBER);
722         ASSERT(agbno != NULLAGBLOCK);
723         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
724         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
725 }
726
727 STATIC int
728 xfs_btree_readahead_lblock(
729         struct xfs_btree_cur    *cur,
730         int                     lr,
731         struct xfs_btree_block  *block)
732 {
733         int                     rval = 0;
734         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
735         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
736
737         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
738                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
739                 rval++;
740         }
741
742         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
743                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
744                 rval++;
745         }
746
747         return rval;
748 }
749
750 STATIC int
751 xfs_btree_readahead_sblock(
752         struct xfs_btree_cur    *cur,
753         int                     lr,
754         struct xfs_btree_block *block)
755 {
756         int                     rval = 0;
757         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
758         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
759
760
761         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
762                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
763                                      left, 1);
764                 rval++;
765         }
766
767         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
768                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
769                                      right, 1);
770                 rval++;
771         }
772
773         return rval;
774 }
775
776 /*
777  * Read-ahead btree blocks, at the given level.
778  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
779  */
780 STATIC int
781 xfs_btree_readahead(
782         struct xfs_btree_cur    *cur,           /* btree cursor */
783         int                     lev,            /* level in btree */
784         int                     lr)             /* left/right bits */
785 {
786         struct xfs_btree_block  *block;
787
788         /*
789          * No readahead needed if we are at the root level and the
790          * btree root is stored in the inode.
791          */
792         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
793             (lev == cur->bc_nlevels - 1))
794                 return 0;
795
796         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
797                 return 0;
798
799         cur->bc_ra[lev] |= lr;
800         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
801
802         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
803                 return xfs_btree_readahead_lblock(cur, lr, block);
804         return xfs_btree_readahead_sblock(cur, lr, block);
805 }
806
807 /*
808  * Set the buffer for level "lev" in the cursor to bp, releasing
809  * any previous buffer.
810  */
811 void
812 xfs_btree_setbuf(
813         xfs_btree_cur_t         *cur,   /* btree cursor */
814         int                     lev,    /* level in btree */
815         xfs_buf_t               *bp)    /* new buffer to set */
816 {
817         xfs_btree_block_t       *b;     /* btree block */
818         xfs_buf_t               *obp;   /* old buffer pointer */
819
820         obp = cur->bc_bufs[lev];
821         if (obp)
822                 xfs_trans_brelse(cur->bc_tp, obp);
823         cur->bc_bufs[lev] = bp;
824         cur->bc_ra[lev] = 0;
825         if (!bp)
826                 return;
827         b = XFS_BUF_TO_BLOCK(bp);
828         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
829                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
830                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
831                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
832                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
833         } else {
834                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
835                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
836                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
837                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
838         }
839 }
840
841 STATIC int
842 xfs_btree_ptr_is_null(
843         struct xfs_btree_cur    *cur,
844         union xfs_btree_ptr     *ptr)
845 {
846         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
847                 return be64_to_cpu(ptr->l) == NULLFSBLOCK;
848         else
849                 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
850 }
851
852 STATIC void
853 xfs_btree_set_ptr_null(
854         struct xfs_btree_cur    *cur,
855         union xfs_btree_ptr     *ptr)
856 {
857         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
858                 ptr->l = cpu_to_be64(NULLFSBLOCK);
859         else
860                 ptr->s = cpu_to_be32(NULLAGBLOCK);
861 }
862
863 /*
864  * Get/set/init sibling pointers
865  */
866 STATIC void
867 xfs_btree_get_sibling(
868         struct xfs_btree_cur    *cur,
869         struct xfs_btree_block  *block,
870         union xfs_btree_ptr     *ptr,
871         int                     lr)
872 {
873         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
874
875         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
876                 if (lr == XFS_BB_RIGHTSIB)
877                         ptr->l = block->bb_u.l.bb_rightsib;
878                 else
879                         ptr->l = block->bb_u.l.bb_leftsib;
880         } else {
881                 if (lr == XFS_BB_RIGHTSIB)
882                         ptr->s = block->bb_u.s.bb_rightsib;
883                 else
884                         ptr->s = block->bb_u.s.bb_leftsib;
885         }
886 }
887
888 STATIC void
889 xfs_btree_set_sibling(
890         struct xfs_btree_cur    *cur,
891         struct xfs_btree_block  *block,
892         union xfs_btree_ptr     *ptr,
893         int                     lr)
894 {
895         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
896
897         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
898                 if (lr == XFS_BB_RIGHTSIB)
899                         block->bb_u.l.bb_rightsib = ptr->l;
900                 else
901                         block->bb_u.l.bb_leftsib = ptr->l;
902         } else {
903                 if (lr == XFS_BB_RIGHTSIB)
904                         block->bb_u.s.bb_rightsib = ptr->s;
905                 else
906                         block->bb_u.s.bb_leftsib = ptr->s;
907         }
908 }
909
910 STATIC void
911 xfs_btree_init_block(
912         struct xfs_btree_cur    *cur,
913         int                     level,
914         int                     numrecs,
915         struct xfs_btree_block  *new)   /* new block */
916 {
917         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
918         new->bb_level = cpu_to_be16(level);
919         new->bb_numrecs = cpu_to_be16(numrecs);
920
921         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
922                 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
923                 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
924         } else {
925                 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
926                 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
927         }
928 }
929
930 /*
931  * Return true if ptr is the last record in the btree and
932  * we need to track updateÑ• to this record.  The decision
933  * will be further refined in the update_lastrec method.
934  */
935 STATIC int
936 xfs_btree_is_lastrec(
937         struct xfs_btree_cur    *cur,
938         struct xfs_btree_block  *block,
939         int                     level)
940 {
941         union xfs_btree_ptr     ptr;
942
943         if (level > 0)
944                 return 0;
945         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
946                 return 0;
947
948         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
949         if (!xfs_btree_ptr_is_null(cur, &ptr))
950                 return 0;
951         return 1;
952 }
953
954 STATIC void
955 xfs_btree_buf_to_ptr(
956         struct xfs_btree_cur    *cur,
957         struct xfs_buf          *bp,
958         union xfs_btree_ptr     *ptr)
959 {
960         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
961                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
962                                         XFS_BUF_ADDR(bp)));
963         else {
964                 ptr->s = cpu_to_be32(XFS_DADDR_TO_AGBNO(cur->bc_mp,
965                                         XFS_BUF_ADDR(bp)));
966         }
967 }
968
969 STATIC xfs_daddr_t
970 xfs_btree_ptr_to_daddr(
971         struct xfs_btree_cur    *cur,
972         union xfs_btree_ptr     *ptr)
973 {
974         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
975                 ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
976
977                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
978         } else {
979                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
980                 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
981
982                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
983                                         be32_to_cpu(ptr->s));
984         }
985 }
986
987 STATIC void
988 xfs_btree_set_refs(
989         struct xfs_btree_cur    *cur,
990         struct xfs_buf          *bp)
991 {
992         switch (cur->bc_btnum) {
993         case XFS_BTNUM_BNO:
994         case XFS_BTNUM_CNT:
995                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
996                 break;
997         case XFS_BTNUM_INO:
998                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
999                 break;
1000         case XFS_BTNUM_BMAP:
1001                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1002                 break;
1003         default:
1004                 ASSERT(0);
1005         }
1006 }
1007
1008 STATIC int
1009 xfs_btree_get_buf_block(
1010         struct xfs_btree_cur    *cur,
1011         union xfs_btree_ptr     *ptr,
1012         int                     flags,
1013         struct xfs_btree_block  **block,
1014         struct xfs_buf          **bpp)
1015 {
1016         struct xfs_mount        *mp = cur->bc_mp;
1017         xfs_daddr_t             d;
1018
1019         /* need to sort out how callers deal with failures first */
1020         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1021
1022         d = xfs_btree_ptr_to_daddr(cur, ptr);
1023         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1024                                  mp->m_bsize, flags);
1025
1026         ASSERT(*bpp);
1027         ASSERT(!XFS_BUF_GETERROR(*bpp));
1028
1029         *block = XFS_BUF_TO_BLOCK(*bpp);
1030         return 0;
1031 }
1032
1033 /*
1034  * Read in the buffer at the given ptr and return the buffer and
1035  * the block pointer within the buffer.
1036  */
1037 STATIC int
1038 xfs_btree_read_buf_block(
1039         struct xfs_btree_cur    *cur,
1040         union xfs_btree_ptr     *ptr,
1041         int                     level,
1042         int                     flags,
1043         struct xfs_btree_block  **block,
1044         struct xfs_buf          **bpp)
1045 {
1046         struct xfs_mount        *mp = cur->bc_mp;
1047         xfs_daddr_t             d;
1048         int                     error;
1049
1050         /* need to sort out how callers deal with failures first */
1051         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1052
1053         d = xfs_btree_ptr_to_daddr(cur, ptr);
1054         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1055                                    mp->m_bsize, flags, bpp);
1056         if (error)
1057                 return error;
1058
1059         ASSERT(*bpp != NULL);
1060         ASSERT(!XFS_BUF_GETERROR(*bpp));
1061
1062         xfs_btree_set_refs(cur, *bpp);
1063         *block = XFS_BUF_TO_BLOCK(*bpp);
1064
1065         error = xfs_btree_check_block(cur, *block, level, *bpp);
1066         if (error)
1067                 xfs_trans_brelse(cur->bc_tp, *bpp);
1068         return error;
1069 }
1070
1071 /*
1072  * Copy keys from one btree block to another.
1073  */
1074 STATIC void
1075 xfs_btree_copy_keys(
1076         struct xfs_btree_cur    *cur,
1077         union xfs_btree_key     *dst_key,
1078         union xfs_btree_key     *src_key,
1079         int                     numkeys)
1080 {
1081         ASSERT(numkeys >= 0);
1082         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1083 }
1084
1085 /*
1086  * Copy records from one btree block to another.
1087  */
1088 STATIC void
1089 xfs_btree_copy_recs(
1090         struct xfs_btree_cur    *cur,
1091         union xfs_btree_rec     *dst_rec,
1092         union xfs_btree_rec     *src_rec,
1093         int                     numrecs)
1094 {
1095         ASSERT(numrecs >= 0);
1096         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1097 }
1098
1099 /*
1100  * Copy block pointers from one btree block to another.
1101  */
1102 STATIC void
1103 xfs_btree_copy_ptrs(
1104         struct xfs_btree_cur    *cur,
1105         union xfs_btree_ptr     *dst_ptr,
1106         union xfs_btree_ptr     *src_ptr,
1107         int                     numptrs)
1108 {
1109         ASSERT(numptrs >= 0);
1110         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1111 }
1112
1113 /*
1114  * Shift keys one index left/right inside a single btree block.
1115  */
1116 STATIC void
1117 xfs_btree_shift_keys(
1118         struct xfs_btree_cur    *cur,
1119         union xfs_btree_key     *key,
1120         int                     dir,
1121         int                     numkeys)
1122 {
1123         char                    *dst_key;
1124
1125         ASSERT(numkeys >= 0);
1126         ASSERT(dir == 1 || dir == -1);
1127
1128         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1129         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1130 }
1131
1132 /*
1133  * Shift records one index left/right inside a single btree block.
1134  */
1135 STATIC void
1136 xfs_btree_shift_recs(
1137         struct xfs_btree_cur    *cur,
1138         union xfs_btree_rec     *rec,
1139         int                     dir,
1140         int                     numrecs)
1141 {
1142         char                    *dst_rec;
1143
1144         ASSERT(numrecs >= 0);
1145         ASSERT(dir == 1 || dir == -1);
1146
1147         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1148         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1149 }
1150
1151 /*
1152  * Shift block pointers one index left/right inside a single btree block.
1153  */
1154 STATIC void
1155 xfs_btree_shift_ptrs(
1156         struct xfs_btree_cur    *cur,
1157         union xfs_btree_ptr     *ptr,
1158         int                     dir,
1159         int                     numptrs)
1160 {
1161         char                    *dst_ptr;
1162
1163         ASSERT(numptrs >= 0);
1164         ASSERT(dir == 1 || dir == -1);
1165
1166         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1167         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1168 }
1169
1170 /*
1171  * Log key values from the btree block.
1172  */
1173 STATIC void
1174 xfs_btree_log_keys(
1175         struct xfs_btree_cur    *cur,
1176         struct xfs_buf          *bp,
1177         int                     first,
1178         int                     last)
1179 {
1180         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1181         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1182
1183         if (bp) {
1184                 xfs_trans_log_buf(cur->bc_tp, bp,
1185                                   xfs_btree_key_offset(cur, first),
1186                                   xfs_btree_key_offset(cur, last + 1) - 1);
1187         } else {
1188                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1189                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1190         }
1191
1192         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1193 }
1194
1195 /*
1196  * Log record values from the btree block.
1197  */
1198 void
1199 xfs_btree_log_recs(
1200         struct xfs_btree_cur    *cur,
1201         struct xfs_buf          *bp,
1202         int                     first,
1203         int                     last)
1204 {
1205         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1206         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1207
1208         xfs_trans_log_buf(cur->bc_tp, bp,
1209                           xfs_btree_rec_offset(cur, first),
1210                           xfs_btree_rec_offset(cur, last + 1) - 1);
1211
1212         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1213 }
1214
1215 /*
1216  * Log block pointer fields from a btree block (nonleaf).
1217  */
1218 STATIC void
1219 xfs_btree_log_ptrs(
1220         struct xfs_btree_cur    *cur,   /* btree cursor */
1221         struct xfs_buf          *bp,    /* buffer containing btree block */
1222         int                     first,  /* index of first pointer to log */
1223         int                     last)   /* index of last pointer to log */
1224 {
1225         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1226         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1227
1228         if (bp) {
1229                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1230                 int                     level = xfs_btree_get_level(block);
1231
1232                 xfs_trans_log_buf(cur->bc_tp, bp,
1233                                 xfs_btree_ptr_offset(cur, first, level),
1234                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1235         } else {
1236                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1237                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1238         }
1239
1240         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1241 }
1242
1243 /*
1244  * Log fields from a btree block header.
1245  */
1246 void
1247 xfs_btree_log_block(
1248         struct xfs_btree_cur    *cur,   /* btree cursor */
1249         struct xfs_buf          *bp,    /* buffer containing btree block */
1250         int                     fields) /* mask of fields: XFS_BB_... */
1251 {
1252         int                     first;  /* first byte offset logged */
1253         int                     last;   /* last byte offset logged */
1254         static const short      soffsets[] = {  /* table of offsets (short) */
1255                 offsetof(struct xfs_btree_sblock, bb_magic),
1256                 offsetof(struct xfs_btree_sblock, bb_level),
1257                 offsetof(struct xfs_btree_sblock, bb_numrecs),
1258                 offsetof(struct xfs_btree_sblock, bb_leftsib),
1259                 offsetof(struct xfs_btree_sblock, bb_rightsib),
1260                 sizeof(struct xfs_btree_sblock)
1261         };
1262         static const short      loffsets[] = {  /* table of offsets (long) */
1263                 offsetof(struct xfs_btree_lblock, bb_magic),
1264                 offsetof(struct xfs_btree_lblock, bb_level),
1265                 offsetof(struct xfs_btree_lblock, bb_numrecs),
1266                 offsetof(struct xfs_btree_lblock, bb_leftsib),
1267                 offsetof(struct xfs_btree_lblock, bb_rightsib),
1268                 sizeof(struct xfs_btree_lblock)
1269         };
1270
1271         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1272         XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1273
1274         if (bp) {
1275                 xfs_btree_offsets(fields,
1276                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1277                                         loffsets : soffsets,
1278                                   XFS_BB_NUM_BITS, &first, &last);
1279                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1280         } else {
1281                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1282                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1283         }
1284
1285         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1286 }
1287
1288 /*
1289  * Increment cursor by one record at the level.
1290  * For nonzero levels the leaf-ward information is untouched.
1291  */
1292 int                                             /* error */
1293 xfs_btree_increment(
1294         struct xfs_btree_cur    *cur,
1295         int                     level,
1296         int                     *stat)          /* success/failure */
1297 {
1298         struct xfs_btree_block  *block;
1299         union xfs_btree_ptr     ptr;
1300         struct xfs_buf          *bp;
1301         int                     error;          /* error return value */
1302         int                     lev;
1303
1304         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1305         XFS_BTREE_TRACE_ARGI(cur, level);
1306
1307         ASSERT(level < cur->bc_nlevels);
1308
1309         /* Read-ahead to the right at this level. */
1310         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1311
1312         /* Get a pointer to the btree block. */
1313         block = xfs_btree_get_block(cur, level, &bp);
1314
1315 #ifdef DEBUG
1316         error = xfs_btree_check_block(cur, block, level, bp);
1317         if (error)
1318                 goto error0;
1319 #endif
1320
1321         /* We're done if we remain in the block after the increment. */
1322         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1323                 goto out1;
1324
1325         /* Fail if we just went off the right edge of the tree. */
1326         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1327         if (xfs_btree_ptr_is_null(cur, &ptr))
1328                 goto out0;
1329
1330         XFS_BTREE_STATS_INC(cur, increment);
1331
1332         /*
1333          * March up the tree incrementing pointers.
1334          * Stop when we don't go off the right edge of a block.
1335          */
1336         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1337                 block = xfs_btree_get_block(cur, lev, &bp);
1338
1339 #ifdef DEBUG
1340                 error = xfs_btree_check_block(cur, block, lev, bp);
1341                 if (error)
1342                         goto error0;
1343 #endif
1344
1345                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1346                         break;
1347
1348                 /* Read-ahead the right block for the next loop. */
1349                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1350         }
1351
1352         /*
1353          * If we went off the root then we are either seriously
1354          * confused or have the tree root in an inode.
1355          */
1356         if (lev == cur->bc_nlevels) {
1357                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1358                         goto out0;
1359                 ASSERT(0);
1360                 error = EFSCORRUPTED;
1361                 goto error0;
1362         }
1363         ASSERT(lev < cur->bc_nlevels);
1364
1365         /*
1366          * Now walk back down the tree, fixing up the cursor's buffer
1367          * pointers and key numbers.
1368          */
1369         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1370                 union xfs_btree_ptr     *ptrp;
1371
1372                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1373                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1374                                                         0, &block, &bp);
1375                 if (error)
1376                         goto error0;
1377
1378                 xfs_btree_setbuf(cur, lev, bp);
1379                 cur->bc_ptrs[lev] = 1;
1380         }
1381 out1:
1382         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1383         *stat = 1;
1384         return 0;
1385
1386 out0:
1387         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1388         *stat = 0;
1389         return 0;
1390
1391 error0:
1392         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1393         return error;
1394 }
1395
1396 /*
1397  * Decrement cursor by one record at the level.
1398  * For nonzero levels the leaf-ward information is untouched.
1399  */
1400 int                                             /* error */
1401 xfs_btree_decrement(
1402         struct xfs_btree_cur    *cur,
1403         int                     level,
1404         int                     *stat)          /* success/failure */
1405 {
1406         struct xfs_btree_block  *block;
1407         xfs_buf_t               *bp;
1408         int                     error;          /* error return value */
1409         int                     lev;
1410         union xfs_btree_ptr     ptr;
1411
1412         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1413         XFS_BTREE_TRACE_ARGI(cur, level);
1414
1415         ASSERT(level < cur->bc_nlevels);
1416
1417         /* Read-ahead to the left at this level. */
1418         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1419
1420         /* We're done if we remain in the block after the decrement. */
1421         if (--cur->bc_ptrs[level] > 0)
1422                 goto out1;
1423
1424         /* Get a pointer to the btree block. */
1425         block = xfs_btree_get_block(cur, level, &bp);
1426
1427 #ifdef DEBUG
1428         error = xfs_btree_check_block(cur, block, level, bp);
1429         if (error)
1430                 goto error0;
1431 #endif
1432
1433         /* Fail if we just went off the left edge of the tree. */
1434         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1435         if (xfs_btree_ptr_is_null(cur, &ptr))
1436                 goto out0;
1437
1438         XFS_BTREE_STATS_INC(cur, decrement);
1439
1440         /*
1441          * March up the tree decrementing pointers.
1442          * Stop when we don't go off the left edge of a block.
1443          */
1444         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1445                 if (--cur->bc_ptrs[lev] > 0)
1446                         break;
1447                 /* Read-ahead the left block for the next loop. */
1448                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1449         }
1450
1451         /*
1452          * If we went off the root then we are seriously confused.
1453          * or the root of the tree is in an inode.
1454          */
1455         if (lev == cur->bc_nlevels) {
1456                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1457                         goto out0;
1458                 ASSERT(0);
1459                 error = EFSCORRUPTED;
1460                 goto error0;
1461         }
1462         ASSERT(lev < cur->bc_nlevels);
1463
1464         /*
1465          * Now walk back down the tree, fixing up the cursor's buffer
1466          * pointers and key numbers.
1467          */
1468         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1469                 union xfs_btree_ptr     *ptrp;
1470
1471                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1472                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1473                                                         0, &block, &bp);
1474                 if (error)
1475                         goto error0;
1476                 xfs_btree_setbuf(cur, lev, bp);
1477                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1478         }
1479 out1:
1480         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1481         *stat = 1;
1482         return 0;
1483
1484 out0:
1485         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1486         *stat = 0;
1487         return 0;
1488
1489 error0:
1490         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1491         return error;
1492 }
1493
1494 STATIC int
1495 xfs_btree_lookup_get_block(
1496         struct xfs_btree_cur    *cur,   /* btree cursor */
1497         int                     level,  /* level in the btree */
1498         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1499         struct xfs_btree_block  **blkp) /* return btree block */
1500 {
1501         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1502         int                     error = 0;
1503
1504         /* special case the root block if in an inode */
1505         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1506             (level == cur->bc_nlevels - 1)) {
1507                 *blkp = xfs_btree_get_iroot(cur);
1508                 return 0;
1509         }
1510
1511         /*
1512          * If the old buffer at this level for the disk address we are
1513          * looking for re-use it.
1514          *
1515          * Otherwise throw it away and get a new one.
1516          */
1517         bp = cur->bc_bufs[level];
1518         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1519                 *blkp = XFS_BUF_TO_BLOCK(bp);
1520                 return 0;
1521         }
1522
1523         error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1524         if (error)
1525                 return error;
1526
1527         xfs_btree_setbuf(cur, level, bp);
1528         return 0;
1529 }
1530
1531 /*
1532  * Get current search key.  For level 0 we don't actually have a key
1533  * structure so we make one up from the record.  For all other levels
1534  * we just return the right key.
1535  */
1536 STATIC union xfs_btree_key *
1537 xfs_lookup_get_search_key(
1538         struct xfs_btree_cur    *cur,
1539         int                     level,
1540         int                     keyno,
1541         struct xfs_btree_block  *block,
1542         union xfs_btree_key     *kp)
1543 {
1544         if (level == 0) {
1545                 cur->bc_ops->init_key_from_rec(kp,
1546                                 xfs_btree_rec_addr(cur, keyno, block));
1547                 return kp;
1548         }
1549
1550         return xfs_btree_key_addr(cur, keyno, block);
1551 }
1552
1553 /*
1554  * Lookup the record.  The cursor is made to point to it, based on dir.
1555  * Return 0 if can't find any such record, 1 for success.
1556  */
1557 int                                     /* error */
1558 xfs_btree_lookup(
1559         struct xfs_btree_cur    *cur,   /* btree cursor */
1560         xfs_lookup_t            dir,    /* <=, ==, or >= */
1561         int                     *stat)  /* success/failure */
1562 {
1563         struct xfs_btree_block  *block; /* current btree block */
1564         __int64_t               diff;   /* difference for the current key */
1565         int                     error;  /* error return value */
1566         int                     keyno;  /* current key number */
1567         int                     level;  /* level in the btree */
1568         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1569         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1570
1571         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1572         XFS_BTREE_TRACE_ARGI(cur, dir);
1573
1574         XFS_BTREE_STATS_INC(cur, lookup);
1575
1576         block = NULL;
1577         keyno = 0;
1578
1579         /* initialise start pointer from cursor */
1580         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1581         pp = &ptr;
1582
1583         /*
1584          * Iterate over each level in the btree, starting at the root.
1585          * For each level above the leaves, find the key we need, based
1586          * on the lookup record, then follow the corresponding block
1587          * pointer down to the next level.
1588          */
1589         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1590                 /* Get the block we need to do the lookup on. */
1591                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1592                 if (error)
1593                         goto error0;
1594
1595                 if (diff == 0) {
1596                         /*
1597                          * If we already had a key match at a higher level, we
1598                          * know we need to use the first entry in this block.
1599                          */
1600                         keyno = 1;
1601                 } else {
1602                         /* Otherwise search this block. Do a binary search. */
1603
1604                         int     high;   /* high entry number */
1605                         int     low;    /* low entry number */
1606
1607                         /* Set low and high entry numbers, 1-based. */
1608                         low = 1;
1609                         high = xfs_btree_get_numrecs(block);
1610                         if (!high) {
1611                                 /* Block is empty, must be an empty leaf. */
1612                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1613
1614                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1615                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1616                                 *stat = 0;
1617                                 return 0;
1618                         }
1619
1620                         /* Binary search the block. */
1621                         while (low <= high) {
1622                                 union xfs_btree_key     key;
1623                                 union xfs_btree_key     *kp;
1624
1625                                 XFS_BTREE_STATS_INC(cur, compare);
1626
1627                                 /* keyno is average of low and high. */
1628                                 keyno = (low + high) >> 1;
1629
1630                                 /* Get current search key */
1631                                 kp = xfs_lookup_get_search_key(cur, level,
1632                                                 keyno, block, &key);
1633
1634                                 /*
1635                                  * Compute difference to get next direction:
1636                                  *  - less than, move right
1637                                  *  - greater than, move left
1638                                  *  - equal, we're done
1639                                  */
1640                                 diff = cur->bc_ops->key_diff(cur, kp);
1641                                 if (diff < 0)
1642                                         low = keyno + 1;
1643                                 else if (diff > 0)
1644                                         high = keyno - 1;
1645                                 else
1646                                         break;
1647                         }
1648                 }
1649
1650                 /*
1651                  * If there are more levels, set up for the next level
1652                  * by getting the block number and filling in the cursor.
1653                  */
1654                 if (level > 0) {
1655                         /*
1656                          * If we moved left, need the previous key number,
1657                          * unless there isn't one.
1658                          */
1659                         if (diff > 0 && --keyno < 1)
1660                                 keyno = 1;
1661                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1662
1663 #ifdef DEBUG
1664                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1665                         if (error)
1666                                 goto error0;
1667 #endif
1668                         cur->bc_ptrs[level] = keyno;
1669                 }
1670         }
1671
1672         /* Done with the search. See if we need to adjust the results. */
1673         if (dir != XFS_LOOKUP_LE && diff < 0) {
1674                 keyno++;
1675                 /*
1676                  * If ge search and we went off the end of the block, but it's
1677                  * not the last block, we're in the wrong block.
1678                  */
1679                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1680                 if (dir == XFS_LOOKUP_GE &&
1681                     keyno > xfs_btree_get_numrecs(block) &&
1682                     !xfs_btree_ptr_is_null(cur, &ptr)) {
1683                         int     i;
1684
1685                         cur->bc_ptrs[0] = keyno;
1686                         error = xfs_btree_increment(cur, 0, &i);
1687                         if (error)
1688                                 goto error0;
1689                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1690                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1691                         *stat = 1;
1692                         return 0;
1693                 }
1694         } else if (dir == XFS_LOOKUP_LE && diff > 0)
1695                 keyno--;
1696         cur->bc_ptrs[0] = keyno;
1697
1698         /* Return if we succeeded or not. */
1699         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1700                 *stat = 0;
1701         else if (dir != XFS_LOOKUP_EQ || diff == 0)
1702                 *stat = 1;
1703         else
1704                 *stat = 0;
1705         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1706         return 0;
1707
1708 error0:
1709         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1710         return error;
1711 }
1712
1713 /*
1714  * Update keys at all levels from here to the root along the cursor's path.
1715  */
1716 STATIC int
1717 xfs_btree_updkey(
1718         struct xfs_btree_cur    *cur,
1719         union xfs_btree_key     *keyp,
1720         int                     level)
1721 {
1722         struct xfs_btree_block  *block;
1723         struct xfs_buf          *bp;
1724         union xfs_btree_key     *kp;
1725         int                     ptr;
1726
1727         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1728         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1729
1730         ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1731
1732         /*
1733          * Go up the tree from this level toward the root.
1734          * At each level, update the key value to the value input.
1735          * Stop when we reach a level where the cursor isn't pointing
1736          * at the first entry in the block.
1737          */
1738         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1739 #ifdef DEBUG
1740                 int             error;
1741 #endif
1742                 block = xfs_btree_get_block(cur, level, &bp);
1743 #ifdef DEBUG
1744                 error = xfs_btree_check_block(cur, block, level, bp);
1745                 if (error) {
1746                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1747                         return error;
1748                 }
1749 #endif
1750                 ptr = cur->bc_ptrs[level];
1751                 kp = xfs_btree_key_addr(cur, ptr, block);
1752                 xfs_btree_copy_keys(cur, kp, keyp, 1);
1753                 xfs_btree_log_keys(cur, bp, ptr, ptr);
1754         }
1755
1756         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1757         return 0;
1758 }
1759
1760 /*
1761  * Update the record referred to by cur to the value in the
1762  * given record. This either works (return 0) or gets an
1763  * EFSCORRUPTED error.
1764  */
1765 int
1766 xfs_btree_update(
1767         struct xfs_btree_cur    *cur,
1768         union xfs_btree_rec     *rec)
1769 {
1770         struct xfs_btree_block  *block;
1771         struct xfs_buf          *bp;
1772         int                     error;
1773         int                     ptr;
1774         union xfs_btree_rec     *rp;
1775
1776         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1777         XFS_BTREE_TRACE_ARGR(cur, rec);
1778
1779         /* Pick up the current block. */
1780         block = xfs_btree_get_block(cur, 0, &bp);
1781
1782 #ifdef DEBUG
1783         error = xfs_btree_check_block(cur, block, 0, bp);
1784         if (error)
1785                 goto error0;
1786 #endif
1787         /* Get the address of the rec to be updated. */
1788         ptr = cur->bc_ptrs[0];
1789         rp = xfs_btree_rec_addr(cur, ptr, block);
1790
1791         /* Fill in the new contents and log them. */
1792         xfs_btree_copy_recs(cur, rp, rec, 1);
1793         xfs_btree_log_recs(cur, bp, ptr, ptr);
1794
1795         /*
1796          * If we are tracking the last record in the tree and
1797          * we are at the far right edge of the tree, update it.
1798          */
1799         if (xfs_btree_is_lastrec(cur, block, 0)) {
1800                 cur->bc_ops->update_lastrec(cur, block, rec,
1801                                             ptr, LASTREC_UPDATE);
1802         }
1803
1804         /* Updating first rec in leaf. Pass new key value up to our parent. */
1805         if (ptr == 1) {
1806                 union xfs_btree_key     key;
1807
1808                 cur->bc_ops->init_key_from_rec(&key, rec);
1809                 error = xfs_btree_updkey(cur, &key, 1);
1810                 if (error)
1811                         goto error0;
1812         }
1813
1814         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1815         return 0;
1816
1817 error0:
1818         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1819         return error;
1820 }
1821
1822 /*
1823  * Move 1 record left from cur/level if possible.
1824  * Update cur to reflect the new path.
1825  */
1826 STATIC int                                      /* error */
1827 xfs_btree_lshift(
1828         struct xfs_btree_cur    *cur,
1829         int                     level,
1830         int                     *stat)          /* success/failure */
1831 {
1832         union xfs_btree_key     key;            /* btree key */
1833         struct xfs_buf          *lbp;           /* left buffer pointer */
1834         struct xfs_btree_block  *left;          /* left btree block */
1835         int                     lrecs;          /* left record count */
1836         struct xfs_buf          *rbp;           /* right buffer pointer */
1837         struct xfs_btree_block  *right;         /* right btree block */
1838         int                     rrecs;          /* right record count */
1839         union xfs_btree_ptr     lptr;           /* left btree pointer */
1840         union xfs_btree_key     *rkp = NULL;    /* right btree key */
1841         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
1842         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
1843         int                     error;          /* error return value */
1844
1845         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1846         XFS_BTREE_TRACE_ARGI(cur, level);
1847
1848         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1849             level == cur->bc_nlevels - 1)
1850                 goto out0;
1851
1852         /* Set up variables for this block as "right". */
1853         right = xfs_btree_get_block(cur, level, &rbp);
1854
1855 #ifdef DEBUG
1856         error = xfs_btree_check_block(cur, right, level, rbp);
1857         if (error)
1858                 goto error0;
1859 #endif
1860
1861         /* If we've got no left sibling then we can't shift an entry left. */
1862         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1863         if (xfs_btree_ptr_is_null(cur, &lptr))
1864                 goto out0;
1865
1866         /*
1867          * If the cursor entry is the one that would be moved, don't
1868          * do it... it's too complicated.
1869          */
1870         if (cur->bc_ptrs[level] <= 1)
1871                 goto out0;
1872
1873         /* Set up the left neighbor as "left". */
1874         error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1875         if (error)
1876                 goto error0;
1877
1878         /* If it's full, it can't take another entry. */
1879         lrecs = xfs_btree_get_numrecs(left);
1880         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1881                 goto out0;
1882
1883         rrecs = xfs_btree_get_numrecs(right);
1884
1885         /*
1886          * We add one entry to the left side and remove one for the right side.
1887          * Accout for it here, the changes will be updated on disk and logged
1888          * later.
1889          */
1890         lrecs++;
1891         rrecs--;
1892
1893         XFS_BTREE_STATS_INC(cur, lshift);
1894         XFS_BTREE_STATS_ADD(cur, moves, 1);
1895
1896         /*
1897          * If non-leaf, copy a key and a ptr to the left block.
1898          * Log the changes to the left block.
1899          */
1900         if (level > 0) {
1901                 /* It's a non-leaf.  Move keys and pointers. */
1902                 union xfs_btree_key     *lkp;   /* left btree key */
1903                 union xfs_btree_ptr     *lpp;   /* left address pointer */
1904
1905                 lkp = xfs_btree_key_addr(cur, lrecs, left);
1906                 rkp = xfs_btree_key_addr(cur, 1, right);
1907
1908                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
1909                 rpp = xfs_btree_ptr_addr(cur, 1, right);
1910 #ifdef DEBUG
1911                 error = xfs_btree_check_ptr(cur, rpp, 0, level);
1912                 if (error)
1913                         goto error0;
1914 #endif
1915                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
1916                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
1917
1918                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
1919                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
1920
1921                 ASSERT(cur->bc_ops->keys_inorder(cur,
1922                         xfs_btree_key_addr(cur, lrecs - 1, left), lkp));
1923         } else {
1924                 /* It's a leaf.  Move records.  */
1925                 union xfs_btree_rec     *lrp;   /* left record pointer */
1926
1927                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
1928                 rrp = xfs_btree_rec_addr(cur, 1, right);
1929
1930                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
1931                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
1932
1933                 ASSERT(cur->bc_ops->recs_inorder(cur,
1934                         xfs_btree_rec_addr(cur, lrecs - 1, left), lrp));
1935         }
1936
1937         xfs_btree_set_numrecs(left, lrecs);
1938         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
1939
1940         xfs_btree_set_numrecs(right, rrecs);
1941         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
1942
1943         /*
1944          * Slide the contents of right down one entry.
1945          */
1946         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
1947         if (level > 0) {
1948                 /* It's a nonleaf. operate on keys and ptrs */
1949 #ifdef DEBUG
1950                 int                     i;              /* loop index */
1951
1952                 for (i = 0; i < rrecs; i++) {
1953                         error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
1954                         if (error)
1955                                 goto error0;
1956                 }
1957 #endif
1958                 xfs_btree_shift_keys(cur,
1959                                 xfs_btree_key_addr(cur, 2, right),
1960                                 -1, rrecs);
1961                 xfs_btree_shift_ptrs(cur,
1962                                 xfs_btree_ptr_addr(cur, 2, right),
1963                                 -1, rrecs);
1964
1965                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
1966                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
1967         } else {
1968                 /* It's a leaf. operate on records */
1969                 xfs_btree_shift_recs(cur,
1970                         xfs_btree_rec_addr(cur, 2, right),
1971                         -1, rrecs);
1972                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
1973
1974                 /*
1975                  * If it's the first record in the block, we'll need a key
1976                  * structure to pass up to the next level (updkey).
1977                  */
1978                 cur->bc_ops->init_key_from_rec(&key,
1979                         xfs_btree_rec_addr(cur, 1, right));
1980                 rkp = &key;
1981         }
1982
1983         /* Update the parent key values of right. */
1984         error = xfs_btree_updkey(cur, rkp, level + 1);
1985         if (error)
1986                 goto error0;
1987
1988         /* Slide the cursor value left one. */
1989         cur->bc_ptrs[level]--;
1990
1991         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1992         *stat = 1;
1993         return 0;
1994
1995 out0:
1996         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1997         *stat = 0;
1998         return 0;
1999
2000 error0:
2001         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2002         return error;
2003 }
2004
2005 /*
2006  * Move 1 record right from cur/level if possible.
2007  * Update cur to reflect the new path.
2008  */
2009 STATIC int                                      /* error */
2010 xfs_btree_rshift(
2011         struct xfs_btree_cur    *cur,
2012         int                     level,
2013         int                     *stat)          /* success/failure */
2014 {
2015         union xfs_btree_key     key;            /* btree key */
2016         struct xfs_buf          *lbp;           /* left buffer pointer */
2017         struct xfs_btree_block  *left;          /* left btree block */
2018         struct xfs_buf          *rbp;           /* right buffer pointer */
2019         struct xfs_btree_block  *right;         /* right btree block */
2020         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2021         union xfs_btree_ptr     rptr;           /* right block pointer */
2022         union xfs_btree_key     *rkp;           /* right btree key */
2023         int                     rrecs;          /* right record count */
2024         int                     lrecs;          /* left record count */
2025         int                     error;          /* error return value */
2026         int                     i;              /* loop counter */
2027
2028         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2029         XFS_BTREE_TRACE_ARGI(cur, level);
2030
2031         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2032             (level == cur->bc_nlevels - 1))
2033                 goto out0;
2034
2035         /* Set up variables for this block as "left". */
2036         left = xfs_btree_get_block(cur, level, &lbp);
2037
2038 #ifdef DEBUG
2039         error = xfs_btree_check_block(cur, left, level, lbp);
2040         if (error)
2041                 goto error0;
2042 #endif
2043
2044         /* If we've got no right sibling then we can't shift an entry right. */
2045         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2046         if (xfs_btree_ptr_is_null(cur, &rptr))
2047                 goto out0;
2048
2049         /*
2050          * If the cursor entry is the one that would be moved, don't
2051          * do it... it's too complicated.
2052          */
2053         lrecs = xfs_btree_get_numrecs(left);
2054         if (cur->bc_ptrs[level] >= lrecs)
2055                 goto out0;
2056
2057         /* Set up the right neighbor as "right". */
2058         error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2059         if (error)
2060                 goto error0;
2061
2062         /* If it's full, it can't take another entry. */
2063         rrecs = xfs_btree_get_numrecs(right);
2064         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2065                 goto out0;
2066
2067         XFS_BTREE_STATS_INC(cur, rshift);
2068         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2069
2070         /*
2071          * Make a hole at the start of the right neighbor block, then
2072          * copy the last left block entry to the hole.
2073          */
2074         if (level > 0) {
2075                 /* It's a nonleaf. make a hole in the keys and ptrs */
2076                 union xfs_btree_key     *lkp;
2077                 union xfs_btree_ptr     *lpp;
2078                 union xfs_btree_ptr     *rpp;
2079
2080                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2081                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2082                 rkp = xfs_btree_key_addr(cur, 1, right);
2083                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2084
2085 #ifdef DEBUG
2086                 for (i = rrecs - 1; i >= 0; i--) {
2087                         error = xfs_btree_check_ptr(cur, rpp, i, level);
2088                         if (error)
2089                                 goto error0;
2090                 }
2091 #endif
2092
2093                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2094                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2095
2096 #ifdef DEBUG
2097                 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2098                 if (error)
2099                         goto error0;
2100 #endif
2101
2102                 /* Now put the new data in, and log it. */
2103                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2104                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2105
2106                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2107                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2108
2109                 ASSERT(cur->bc_ops->keys_inorder(cur, rkp,
2110                         xfs_btree_key_addr(cur, 2, right)));
2111         } else {
2112                 /* It's a leaf. make a hole in the records */
2113                 union xfs_btree_rec     *lrp;
2114                 union xfs_btree_rec     *rrp;
2115
2116                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2117                 rrp = xfs_btree_rec_addr(cur, 1, right);
2118
2119                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2120
2121                 /* Now put the new data in, and log it. */
2122                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2123                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2124
2125                 cur->bc_ops->init_key_from_rec(&key, rrp);
2126                 rkp = &key;
2127
2128                 ASSERT(cur->bc_ops->recs_inorder(cur, rrp,
2129                         xfs_btree_rec_addr(cur, 2, right)));
2130         }
2131
2132         /*
2133          * Decrement and log left's numrecs, bump and log right's numrecs.
2134          */
2135         xfs_btree_set_numrecs(left, --lrecs);
2136         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2137
2138         xfs_btree_set_numrecs(right, ++rrecs);
2139         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2140
2141         /*
2142          * Using a temporary cursor, update the parent key values of the
2143          * block on the right.
2144          */
2145         error = xfs_btree_dup_cursor(cur, &tcur);
2146         if (error)
2147                 goto error0;
2148         i = xfs_btree_lastrec(tcur, level);
2149         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2150
2151         error = xfs_btree_increment(tcur, level, &i);
2152         if (error)
2153                 goto error1;
2154
2155         error = xfs_btree_updkey(tcur, rkp, level + 1);
2156         if (error)
2157                 goto error1;
2158
2159         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2160
2161         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2162         *stat = 1;
2163         return 0;
2164
2165 out0:
2166         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2167         *stat = 0;
2168         return 0;
2169
2170 error0:
2171         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2172         return error;
2173
2174 error1:
2175         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2176         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2177         return error;
2178 }
2179
2180 /*
2181  * Split cur/level block in half.
2182  * Return new block number and the key to its first
2183  * record (to be inserted into parent).
2184  */
2185 STATIC int                                      /* error */
2186 xfs_btree_split(
2187         struct xfs_btree_cur    *cur,
2188         int                     level,
2189         union xfs_btree_ptr     *ptrp,
2190         union xfs_btree_key     *key,
2191         struct xfs_btree_cur    **curp,
2192         int                     *stat)          /* success/failure */
2193 {
2194         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2195         struct xfs_buf          *lbp;           /* left buffer pointer */
2196         struct xfs_btree_block  *left;          /* left btree block */
2197         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2198         struct xfs_buf          *rbp;           /* right buffer pointer */
2199         struct xfs_btree_block  *right;         /* right btree block */
2200         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2201         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2202         struct xfs_btree_block  *rrblock;       /* right-right btree block */
2203         int                     lrecs;
2204         int                     rrecs;
2205         int                     src_index;
2206         int                     error;          /* error return value */
2207 #ifdef DEBUG
2208         int                     i;
2209 #endif
2210
2211         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2212         XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2213
2214         XFS_BTREE_STATS_INC(cur, split);
2215
2216         /* Set up left block (current one). */
2217         left = xfs_btree_get_block(cur, level, &lbp);
2218
2219 #ifdef DEBUG
2220         error = xfs_btree_check_block(cur, left, level, lbp);
2221         if (error)
2222                 goto error0;
2223 #endif
2224
2225         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2226
2227         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2228         error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2229         if (error)
2230                 goto error0;
2231         if (*stat == 0)
2232                 goto out0;
2233         XFS_BTREE_STATS_INC(cur, alloc);
2234
2235         /* Set up the new block as "right". */
2236         error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2237         if (error)
2238                 goto error0;
2239
2240         /* Fill in the btree header for the new right block. */
2241         xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2242
2243         /*
2244          * Split the entries between the old and the new block evenly.
2245          * Make sure that if there's an odd number of entries now, that
2246          * each new block will have the same number of entries.
2247          */
2248         lrecs = xfs_btree_get_numrecs(left);
2249         rrecs = lrecs / 2;
2250         if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2251                 rrecs++;
2252         src_index = (lrecs - rrecs + 1);
2253
2254         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2255
2256         /*
2257          * Copy btree block entries from the left block over to the
2258          * new block, the right. Update the right block and log the
2259          * changes.
2260          */
2261         if (level > 0) {
2262                 /* It's a non-leaf.  Move keys and pointers. */
2263                 union xfs_btree_key     *lkp;   /* left btree key */
2264                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2265                 union xfs_btree_key     *rkp;   /* right btree key */
2266                 union xfs_btree_ptr     *rpp;   /* right address pointer */
2267
2268                 lkp = xfs_btree_key_addr(cur, src_index, left);
2269                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2270                 rkp = xfs_btree_key_addr(cur, 1, right);
2271                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2272
2273 #ifdef DEBUG
2274                 for (i = src_index; i < rrecs; i++) {
2275                         error = xfs_btree_check_ptr(cur, lpp, i, level);
2276                         if (error)
2277                                 goto error0;
2278                 }
2279 #endif
2280
2281                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2282                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2283
2284                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2285                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2286
2287                 /* Grab the keys to the entries moved to the right block */
2288                 xfs_btree_copy_keys(cur, key, rkp, 1);
2289         } else {
2290                 /* It's a leaf.  Move records.  */
2291                 union xfs_btree_rec     *lrp;   /* left record pointer */
2292                 union xfs_btree_rec     *rrp;   /* right record pointer */
2293
2294                 lrp = xfs_btree_rec_addr(cur, src_index, left);
2295                 rrp = xfs_btree_rec_addr(cur, 1, right);
2296
2297                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2298                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2299
2300                 cur->bc_ops->init_key_from_rec(key,
2301                         xfs_btree_rec_addr(cur, 1, right));
2302         }
2303
2304
2305         /*
2306          * Find the left block number by looking in the buffer.
2307          * Adjust numrecs, sibling pointers.
2308          */
2309         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2310         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2311         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2312         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2313
2314         lrecs -= rrecs;
2315         xfs_btree_set_numrecs(left, lrecs);
2316         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2317
2318         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2319         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2320
2321         /*
2322          * If there's a block to the new block's right, make that block
2323          * point back to right instead of to left.
2324          */
2325         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2326                 error = xfs_btree_read_buf_block(cur, &rrptr, level,
2327                                                         0, &rrblock, &rrbp);
2328                 if (error)
2329                         goto error0;
2330                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2331                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2332         }
2333         /*
2334          * If the cursor is really in the right block, move it there.
2335          * If it's just pointing past the last entry in left, then we'll
2336          * insert there, so don't change anything in that case.
2337          */
2338         if (cur->bc_ptrs[level] > lrecs + 1) {
2339                 xfs_btree_setbuf(cur, level, rbp);
2340                 cur->bc_ptrs[level] -= lrecs;
2341         }
2342         /*
2343          * If there are more levels, we'll need another cursor which refers
2344          * the right block, no matter where this cursor was.
2345          */
2346         if (level + 1 < cur->bc_nlevels) {
2347                 error = xfs_btree_dup_cursor(cur, curp);
2348                 if (error)
2349                         goto error0;
2350                 (*curp)->bc_ptrs[level + 1]++;
2351         }
2352         *ptrp = rptr;
2353         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2354         *stat = 1;
2355         return 0;
2356 out0:
2357         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2358         *stat = 0;
2359         return 0;
2360
2361 error0:
2362         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2363         return error;
2364 }
2365
2366 /*
2367  * Copy the old inode root contents into a real block and make the
2368  * broot point to it.
2369  */
2370 int                                             /* error */
2371 xfs_btree_new_iroot(
2372         struct xfs_btree_cur    *cur,           /* btree cursor */
2373         int                     *logflags,      /* logging flags for inode */
2374         int                     *stat)          /* return status - 0 fail */
2375 {
2376         struct xfs_buf          *cbp;           /* buffer for cblock */
2377         struct xfs_btree_block  *block;         /* btree block */
2378         struct xfs_btree_block  *cblock;        /* child btree block */
2379         union xfs_btree_key     *ckp;           /* child key pointer */
2380         union xfs_btree_ptr     *cpp;           /* child ptr pointer */
2381         union xfs_btree_key     *kp;            /* pointer to btree key */
2382         union xfs_btree_ptr     *pp;            /* pointer to block addr */
2383         union xfs_btree_ptr     nptr;           /* new block addr */
2384         int                     level;          /* btree level */
2385         int                     error;          /* error return code */
2386 #ifdef DEBUG
2387         int                     i;              /* loop counter */
2388 #endif
2389
2390         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2391         XFS_BTREE_STATS_INC(cur, newroot);
2392
2393         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2394
2395         level = cur->bc_nlevels - 1;
2396
2397         block = xfs_btree_get_iroot(cur);
2398         pp = xfs_btree_ptr_addr(cur, 1, block);
2399
2400         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2401         error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat);
2402         if (error)
2403                 goto error0;
2404         if (*stat == 0) {
2405                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2406                 return 0;
2407         }
2408         XFS_BTREE_STATS_INC(cur, alloc);
2409
2410         /* Copy the root into a real block. */
2411         error = xfs_btree_get_buf_block(cur, &nptr, 0, &cblock, &cbp);
2412         if (error)
2413                 goto error0;
2414
2415         memcpy(cblock, block, xfs_btree_block_len(cur));
2416
2417         be16_add_cpu(&block->bb_level, 1);
2418         xfs_btree_set_numrecs(block, 1);
2419         cur->bc_nlevels++;
2420         cur->bc_ptrs[level + 1] = 1;
2421
2422         kp = xfs_btree_key_addr(cur, 1, block);
2423         ckp = xfs_btree_key_addr(cur, 1, cblock);
2424         xfs_btree_copy_keys(cur, ckp, kp, xfs_btree_get_numrecs(cblock));
2425
2426         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
2427 #ifdef DEBUG
2428         for (i = 0; i < be16_to_cpu(cblock->bb_numrecs); i++) {
2429                 error = xfs_btree_check_ptr(cur, pp, i, level);
2430                 if (error)
2431                         goto error0;
2432         }
2433 #endif
2434         xfs_btree_copy_ptrs(cur, cpp, pp, xfs_btree_get_numrecs(cblock));
2435
2436 #ifdef DEBUG
2437         error = xfs_btree_check_ptr(cur, &nptr, 0, level);
2438         if (error)
2439                 goto error0;
2440 #endif
2441         xfs_btree_copy_ptrs(cur, pp, &nptr, 1);
2442
2443         xfs_iroot_realloc(cur->bc_private.b.ip,
2444                           1 - xfs_btree_get_numrecs(cblock),
2445                           cur->bc_private.b.whichfork);
2446
2447         xfs_btree_setbuf(cur, level, cbp);
2448
2449         /*
2450          * Do all this logging at the end so that
2451          * the root is at the right level.
2452          */
2453         xfs_btree_log_block(cur, cbp, XFS_BB_ALL_BITS);
2454         xfs_btree_log_keys(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2455         xfs_btree_log_ptrs(cur, cbp, 1, be16_to_cpu(cblock->bb_numrecs));
2456
2457         *logflags |=
2458                 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork);
2459         *stat = 1;
2460         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2461         return 0;
2462 error0:
2463         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2464         return error;
2465 }
2466
2467 /*
2468  * Allocate a new root block, fill it in.
2469  */
2470 STATIC int                              /* error */
2471 xfs_btree_new_root(
2472         struct xfs_btree_cur    *cur,   /* btree cursor */
2473         int                     *stat)  /* success/failure */
2474 {
2475         struct xfs_btree_block  *block; /* one half of the old root block */
2476         struct xfs_buf          *bp;    /* buffer containing block */
2477         int                     error;  /* error return value */
2478         struct xfs_buf          *lbp;   /* left buffer pointer */
2479         struct xfs_btree_block  *left;  /* left btree block */
2480         struct xfs_buf          *nbp;   /* new (root) buffer */
2481         struct xfs_btree_block  *new;   /* new (root) btree block */
2482         int                     nptr;   /* new value for key index, 1 or 2 */
2483         struct xfs_buf          *rbp;   /* right buffer pointer */
2484         struct xfs_btree_block  *right; /* right btree block */
2485         union xfs_btree_ptr     rptr;
2486         union xfs_btree_ptr     lptr;
2487
2488         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2489         XFS_BTREE_STATS_INC(cur, newroot);
2490
2491         /* initialise our start point from the cursor */
2492         cur->bc_ops->init_ptr_from_cur(cur, &rptr);
2493
2494         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2495         error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat);
2496         if (error)
2497                 goto error0;
2498         if (*stat == 0)
2499                 goto out0;
2500         XFS_BTREE_STATS_INC(cur, alloc);
2501
2502         /* Set up the new block. */
2503         error = xfs_btree_get_buf_block(cur, &lptr, 0, &new, &nbp);
2504         if (error)
2505                 goto error0;
2506
2507         /* Set the root in the holding structure  increasing the level by 1. */
2508         cur->bc_ops->set_root(cur, &lptr, 1);
2509
2510         /*
2511          * At the previous root level there are now two blocks: the old root,
2512          * and the new block generated when it was split.  We don't know which
2513          * one the cursor is pointing at, so we set up variables "left" and
2514          * "right" for each case.
2515          */
2516         block = xfs_btree_get_block(cur, cur->bc_nlevels - 1, &bp);
2517
2518 #ifdef DEBUG
2519         error = xfs_btree_check_block(cur, block, cur->bc_nlevels - 1, bp);
2520         if (error)
2521                 goto error0;
2522 #endif
2523
2524         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
2525         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
2526                 /* Our block is left, pick up the right block. */
2527                 lbp = bp;
2528                 xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2529                 left = block;
2530                 error = xfs_btree_read_buf_block(cur, &rptr,
2531                                         cur->bc_nlevels - 1, 0, &right, &rbp);
2532                 if (error)
2533                         goto error0;
2534                 bp = rbp;
2535                 nptr = 1;
2536         } else {
2537                 /* Our block is right, pick up the left block. */
2538                 rbp = bp;
2539                 xfs_btree_buf_to_ptr(cur, rbp, &rptr);
2540                 right = block;
2541                 xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2542                 error = xfs_btree_read_buf_block(cur, &lptr,
2543                                         cur->bc_nlevels - 1, 0, &left, &lbp);
2544                 if (error)
2545                         goto error0;
2546                 bp = lbp;
2547                 nptr = 2;
2548         }
2549         /* Fill in the new block's btree header and log it. */
2550         xfs_btree_init_block(cur, cur->bc_nlevels, 2, new);
2551         xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);
2552         ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&
2553                         !xfs_btree_ptr_is_null(cur, &rptr));
2554
2555         /* Fill in the key data in the new root. */
2556         if (xfs_btree_get_level(left) > 0) {
2557                 xfs_btree_copy_keys(cur,
2558                                 xfs_btree_key_addr(cur, 1, new),
2559                                 xfs_btree_key_addr(cur, 1, left), 1);
2560                 xfs_btree_copy_keys(cur,
2561                                 xfs_btree_key_addr(cur, 2, new),
2562                                 xfs_btree_key_addr(cur, 1, right), 1);
2563         } else {
2564                 cur->bc_ops->init_key_from_rec(
2565                                 xfs_btree_key_addr(cur, 1, new),
2566                                 xfs_btree_rec_addr(cur, 1, left));
2567                 cur->bc_ops->init_key_from_rec(
2568                                 xfs_btree_key_addr(cur, 2, new),
2569                                 xfs_btree_rec_addr(cur, 1, right));
2570         }
2571         xfs_btree_log_keys(cur, nbp, 1, 2);
2572
2573         /* Fill in the pointer data in the new root. */
2574         xfs_btree_copy_ptrs(cur,
2575                 xfs_btree_ptr_addr(cur, 1, new), &lptr, 1);
2576         xfs_btree_copy_ptrs(cur,
2577                 xfs_btree_ptr_addr(cur, 2, new), &rptr, 1);
2578         xfs_btree_log_ptrs(cur, nbp, 1, 2);
2579
2580         /* Fix up the cursor. */
2581         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
2582         cur->bc_ptrs[cur->bc_nlevels] = nptr;
2583         cur->bc_nlevels++;
2584         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2585         *stat = 1;
2586         return 0;
2587 error0:
2588         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2589         return error;
2590 out0:
2591         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2592         *stat = 0;
2593         return 0;
2594 }
2595
2596 STATIC int
2597 xfs_btree_make_block_unfull(
2598         struct xfs_btree_cur    *cur,   /* btree cursor */
2599         int                     level,  /* btree level */
2600         int                     numrecs,/* # of recs in block */
2601         int                     *oindex,/* old tree index */
2602         int                     *index, /* new tree index */
2603         union xfs_btree_ptr     *nptr,  /* new btree ptr */
2604         struct xfs_btree_cur    **ncur, /* new btree cursor */
2605         union xfs_btree_rec     *nrec,  /* new record */
2606         int                     *stat)
2607 {
2608         union xfs_btree_key     key;    /* new btree key value */
2609         int                     error = 0;
2610
2611         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2612             level == cur->bc_nlevels - 1) {
2613                 struct xfs_inode *ip = cur->bc_private.b.ip;
2614
2615                 if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {
2616                         /* A root block that can be made bigger. */
2617
2618                         xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);
2619                 } else {
2620                         /* A root block that needs replacing */
2621                         int     logflags = 0;
2622
2623                         error = xfs_btree_new_iroot(cur, &logflags, stat);
2624                         if (error || *stat == 0)
2625                                 return error;
2626
2627                         xfs_trans_log_inode(cur->bc_tp, ip, logflags);
2628                 }
2629
2630                 return 0;
2631         }
2632
2633         /* First, try shifting an entry to the right neighbor. */
2634         error = xfs_btree_rshift(cur, level, stat);
2635         if (error || *stat)
2636                 return error;
2637
2638         /* Next, try shifting an entry to the left neighbor. */
2639         error = xfs_btree_lshift(cur, level, stat);
2640         if (error)
2641                 return error;
2642
2643         if (*stat) {
2644                 *oindex = *index = cur->bc_ptrs[level];
2645                 return 0;
2646         }
2647
2648         /*
2649          * Next, try splitting the current block in half.
2650          *
2651          * If this works we have to re-set our variables because we
2652          * could be in a different block now.
2653          */
2654         error = xfs_btree_split(cur, level, nptr, &key, ncur, stat);
2655         if (error || *stat == 0)
2656                 return error;
2657
2658
2659         *index = cur->bc_ptrs[level];
2660         cur->bc_ops->init_rec_from_key(&key, nrec);
2661         return 0;
2662 }
2663
2664 /*
2665  * Insert one record/level.  Return information to the caller
2666  * allowing the next level up to proceed if necessary.
2667  */
2668 STATIC int
2669 xfs_btree_insrec(
2670         struct xfs_btree_cur    *cur,   /* btree cursor */
2671         int                     level,  /* level to insert record at */
2672         union xfs_btree_ptr     *ptrp,  /* i/o: block number inserted */
2673         union xfs_btree_rec     *recp,  /* i/o: record data inserted */
2674         struct xfs_btree_cur    **curp, /* output: new cursor replacing cur */
2675         int                     *stat)  /* success/failure */
2676 {
2677         struct xfs_btree_block  *block; /* btree block */
2678         struct xfs_buf          *bp;    /* buffer for block */
2679         union xfs_btree_key     key;    /* btree key */
2680         union xfs_btree_ptr     nptr;   /* new block ptr */
2681         struct xfs_btree_cur    *ncur;  /* new btree cursor */
2682         union xfs_btree_rec     nrec;   /* new record count */
2683         int                     optr;   /* old key/record index */
2684         int                     ptr;    /* key/record index */
2685         int                     numrecs;/* number of records */
2686         int                     error;  /* error return value */
2687 #ifdef DEBUG
2688         int                     i;
2689 #endif
2690
2691         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2692         XFS_BTREE_TRACE_ARGIPR(cur, level, *ptrp, recp);
2693
2694         ncur = NULL;
2695
2696         /*
2697          * If we have an external root pointer, and we've made it to the
2698          * root level, allocate a new root block and we're done.
2699          */
2700         if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2701             (level >= cur->bc_nlevels)) {
2702                 error = xfs_btree_new_root(cur, stat);
2703                 xfs_btree_set_ptr_null(cur, ptrp);
2704
2705                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2706                 return error;
2707         }
2708
2709         /* If we're off the left edge, return failure. */
2710         ptr = cur->bc_ptrs[level];
2711         if (ptr == 0) {
2712                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2713                 *stat = 0;
2714                 return 0;
2715         }
2716
2717         /* Make a key out of the record data to be inserted, and save it. */
2718         cur->bc_ops->init_key_from_rec(&key, recp);
2719
2720         optr = ptr;
2721
2722         XFS_BTREE_STATS_INC(cur, insrec);
2723
2724         /* Get pointers to the btree buffer and block. */
2725         block = xfs_btree_get_block(cur, level, &bp);
2726         numrecs = xfs_btree_get_numrecs(block);
2727
2728 #ifdef DEBUG
2729         error = xfs_btree_check_block(cur, block, level, bp);
2730         if (error)
2731                 goto error0;
2732
2733         /* Check that the new entry is being inserted in the right place. */
2734         if (ptr <= numrecs) {
2735                 if (level == 0) {
2736                         ASSERT(cur->bc_ops->recs_inorder(cur, recp,
2737                                 xfs_btree_rec_addr(cur, ptr, block)));
2738                 } else {
2739                         ASSERT(cur->bc_ops->keys_inorder(cur, &key,
2740                                 xfs_btree_key_addr(cur, ptr, block)));
2741                 }
2742         }
2743 #endif
2744
2745         /*
2746          * If the block is full, we can't insert the new entry until we
2747          * make the block un-full.
2748          */
2749         xfs_btree_set_ptr_null(cur, &nptr);
2750         if (numrecs == cur->bc_ops->get_maxrecs(cur, level)) {
2751                 error = xfs_btree_make_block_unfull(cur, level, numrecs,
2752                                         &optr, &ptr, &nptr, &ncur, &nrec, stat);
2753                 if (error || *stat == 0)
2754                         goto error0;
2755         }
2756
2757         /*
2758          * The current block may have changed if the block was
2759          * previously full and we have just made space in it.
2760          */
2761         block = xfs_btree_get_block(cur, level, &bp);
2762         numrecs = xfs_btree_get_numrecs(block);
2763
2764 #ifdef DEBUG
2765         error = xfs_btree_check_block(cur, block, level, bp);
2766         if (error)
2767                 return error;
2768 #endif
2769
2770         /*
2771          * At this point we know there's room for our new entry in the block
2772          * we're pointing at.
2773          */
2774         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr + 1);
2775
2776         if (level > 0) {
2777                 /* It's a nonleaf. make a hole in the keys and ptrs */
2778                 union xfs_btree_key     *kp;
2779                 union xfs_btree_ptr     *pp;
2780
2781                 kp = xfs_btree_key_addr(cur, ptr, block);
2782                 pp = xfs_btree_ptr_addr(cur, ptr, block);
2783
2784 #ifdef DEBUG
2785                 for (i = numrecs - ptr; i >= 0; i--) {
2786                         error = xfs_btree_check_ptr(cur, pp, i, level);
2787                         if (error)
2788                                 return error;
2789                 }
2790 #endif
2791
2792                 xfs_btree_shift_keys(cur, kp, 1, numrecs - ptr + 1);
2793                 xfs_btree_shift_ptrs(cur, pp, 1, numrecs - ptr + 1);
2794
2795 #ifdef DEBUG
2796                 error = xfs_btree_check_ptr(cur, ptrp, 0, level);
2797                 if (error)
2798                         goto error0;
2799 #endif
2800
2801                 /* Now put the new data in, bump numrecs and log it. */
2802                 xfs_btree_copy_keys(cur, kp, &key, 1);
2803                 xfs_btree_copy_ptrs(cur, pp, ptrp, 1);
2804                 numrecs++;
2805                 xfs_btree_set_numrecs(block, numrecs);
2806                 xfs_btree_log_ptrs(cur, bp, ptr, numrecs);
2807                 xfs_btree_log_keys(cur, bp, ptr, numrecs);
2808 #ifdef DEBUG
2809                 if (ptr < numrecs) {
2810                         ASSERT(cur->bc_ops->keys_inorder(cur, kp,
2811                                 xfs_btree_key_addr(cur, ptr + 1, block)));
2812                 }
2813 #endif
2814         } else {
2815                 /* It's a leaf. make a hole in the records */
2816                 union xfs_btree_rec             *rp;
2817
2818                 rp = xfs_btree_rec_addr(cur, ptr, block);
2819
2820                 xfs_btree_shift_recs(cur, rp, 1, numrecs - ptr + 1);
2821
2822                 /* Now put the new data in, bump numrecs and log it. */
2823                 xfs_btree_copy_recs(cur, rp, recp, 1);
2824                 xfs_btree_set_numrecs(block, ++numrecs);
2825                 xfs_btree_log_recs(cur, bp, ptr, numrecs);
2826 #ifdef DEBUG
2827                 if (ptr < numrecs) {
2828                         ASSERT(cur->bc_ops->recs_inorder(cur, rp,
2829                                 xfs_btree_rec_addr(cur, ptr + 1, block)));
2830                 }
2831 #endif
2832         }
2833
2834         /* Log the new number of records in the btree header. */
2835         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
2836
2837         /* If we inserted at the start of a block, update the parents' keys. */
2838         if (optr == 1) {
2839                 error = xfs_btree_updkey(cur, &key, level + 1);
2840                 if (error)
2841                         goto error0;
2842         }
2843
2844         /*
2845          * If we are tracking the last record in the tree and
2846          * we are at the far right edge of the tree, update it.
2847          */
2848         if (xfs_btree_is_lastrec(cur, block, level)) {
2849                 cur->bc_ops->update_lastrec(cur, block, recp,
2850                                             ptr, LASTREC_INSREC);
2851         }
2852
2853         /*
2854          * Return the new block number, if any.
2855          * If there is one, give back a record value and a cursor too.
2856          */
2857         *ptrp = nptr;
2858         if (!xfs_btree_ptr_is_null(cur, &nptr)) {
2859                 *recp = nrec;
2860                 *curp = ncur;
2861         }
2862
2863         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2864         *stat = 1;
2865         return 0;
2866
2867 error0:
2868         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2869         return error;
2870 }
2871
2872 /*
2873  * Insert the record at the point referenced by cur.
2874  *
2875  * A multi-level split of the tree on insert will invalidate the original
2876  * cursor.  All callers of this function should assume that the cursor is
2877  * no longer valid and revalidate it.
2878  */
2879 int
2880 xfs_btree_insert(
2881         struct xfs_btree_cur    *cur,
2882         int                     *stat)
2883 {
2884         int                     error;  /* error return value */
2885         int                     i;      /* result value, 0 for failure */
2886         int                     level;  /* current level number in btree */
2887         union xfs_btree_ptr     nptr;   /* new block number (split result) */
2888         struct xfs_btree_cur    *ncur;  /* new cursor (split result) */
2889         struct xfs_btree_cur    *pcur;  /* previous level's cursor */
2890         union xfs_btree_rec     rec;    /* record to insert */
2891
2892         level = 0;
2893         ncur = NULL;
2894         pcur = cur;
2895
2896         xfs_btree_set_ptr_null(cur, &nptr);
2897         cur->bc_ops->init_rec_from_cur(cur, &rec);
2898
2899         /*
2900          * Loop going up the tree, starting at the leaf level.
2901          * Stop when we don't get a split block, that must mean that
2902          * the insert is finished with this level.
2903          */
2904         do {
2905                 /*
2906                  * Insert nrec/nptr into this level of the tree.
2907                  * Note if we fail, nptr will be null.
2908                  */
2909                 error = xfs_btree_insrec(pcur, level, &nptr, &rec, &ncur, &i);
2910                 if (error) {
2911                         if (pcur != cur)
2912                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2913                         goto error0;
2914                 }
2915
2916                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2917                 level++;
2918
2919                 /*
2920                  * See if the cursor we just used is trash.
2921                  * Can't trash the caller's cursor, but otherwise we should
2922                  * if ncur is a new cursor or we're about to be done.
2923                  */
2924                 if (pcur != cur &&
2925                     (ncur || xfs_btree_ptr_is_null(cur, &nptr))) {
2926                         /* Save the state from the cursor before we trash it */
2927                         if (cur->bc_ops->update_cursor)
2928                                 cur->bc_ops->update_cursor(pcur, cur);
2929                         cur->bc_nlevels = pcur->bc_nlevels;
2930                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2931                 }
2932                 /* If we got a new cursor, switch to it. */
2933                 if (ncur) {
2934                         pcur = ncur;
2935                         ncur = NULL;
2936                 }
2937         } while (!xfs_btree_ptr_is_null(cur, &nptr));
2938
2939         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2940         *stat = i;
2941         return 0;
2942 error0:
2943         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2944         return error;
2945 }
2946
2947 /*
2948  * Try to merge a non-leaf block back into the inode root.
2949  *
2950  * Note: the killroot names comes from the fact that we're effectively
2951  * killing the old root block.  But because we can't just delete the
2952  * inode we have to copy the single block it was pointing to into the
2953  * inode.
2954  */
2955 int
2956 xfs_btree_kill_iroot(
2957         struct xfs_btree_cur    *cur)
2958 {
2959         int                     whichfork = cur->bc_private.b.whichfork;
2960         struct xfs_inode        *ip = cur->bc_private.b.ip;
2961         struct xfs_ifork        *ifp = XFS_IFORK_PTR(ip, whichfork);
2962         struct xfs_btree_block  *block;
2963         struct xfs_btree_block  *cblock;
2964         union xfs_btree_key     *kp;
2965         union xfs_btree_key     *ckp;
2966         union xfs_btree_ptr     *pp;
2967         union xfs_btree_ptr     *cpp;
2968         struct xfs_buf          *cbp;
2969         int                     level;
2970         int                     index;
2971         int                     numrecs;
2972 #ifdef DEBUG
2973         union xfs_btree_ptr     ptr;
2974         int                     i;
2975 #endif
2976
2977         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2978
2979         ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
2980         ASSERT(cur->bc_nlevels > 1);
2981
2982         /*
2983          * Don't deal with the root block needs to be a leaf case.
2984          * We're just going to turn the thing back into extents anyway.
2985          */
2986         level = cur->bc_nlevels - 1;
2987         if (level == 1)
2988                 goto out0;
2989
2990         /*
2991          * Give up if the root has multiple children.
2992          */
2993         block = xfs_btree_get_iroot(cur);
2994         if (xfs_btree_get_numrecs(block) != 1)
2995                 goto out0;
2996
2997         cblock = xfs_btree_get_block(cur, level - 1, &cbp);
2998         numrecs = xfs_btree_get_numrecs(cblock);
2999
3000         /*
3001          * Only do this if the next level will fit.
3002          * Then the data must be copied up to the inode,
3003          * instead of freeing the root you free the next level.
3004          */
3005         if (numrecs > cur->bc_ops->get_dmaxrecs(cur, level))
3006                 goto out0;
3007
3008         XFS_BTREE_STATS_INC(cur, killroot);
3009
3010 #ifdef DEBUG
3011         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
3012         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3013         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
3014         ASSERT(xfs_btree_ptr_is_null(cur, &ptr));
3015 #endif
3016
3017         index = numrecs - cur->bc_ops->get_maxrecs(cur, level);
3018         if (index) {
3019                 xfs_iroot_realloc(cur->bc_private.b.ip, index,
3020                                   cur->bc_private.b.whichfork);
3021                 block = (struct xfs_btree_block *)ifp->if_broot;
3022         }
3023
3024         be16_add_cpu(&block->bb_numrecs, index);
3025         ASSERT(block->bb_numrecs == cblock->bb_numrecs);
3026
3027         kp = xfs_btree_key_addr(cur, 1, block);
3028         ckp = xfs_btree_key_addr(cur, 1, cblock);
3029         xfs_btree_copy_keys(cur, kp, ckp, numrecs);
3030
3031         pp = xfs_btree_ptr_addr(cur, 1, block);
3032         cpp = xfs_btree_ptr_addr(cur, 1, cblock);
3033 #ifdef DEBUG
3034         for (i = 0; i < numrecs; i++) {
3035                 int             error;
3036
3037                 error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
3038                 if (error) {
3039                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3040                         return error;
3041                 }
3042         }
3043 #endif
3044         xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
3045
3046         cur->bc_ops->free_block(cur, cbp);
3047         XFS_BTREE_STATS_INC(cur, free);
3048
3049         cur->bc_bufs[level - 1] = NULL;
3050         be16_add_cpu(&block->bb_level, -1);
3051         xfs_trans_log_inode(cur->bc_tp, ip,
3052                 XFS_ILOG_CORE | XFS_ILOG_FBROOT(cur->bc_private.b.whichfork));
3053         cur->bc_nlevels--;
3054 out0:
3055         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3056         return 0;
3057 }
3058
3059 STATIC int
3060 xfs_btree_dec_cursor(
3061         struct xfs_btree_cur    *cur,
3062         int                     level,
3063         int                     *stat)
3064 {
3065         int                     error;
3066         int                     i;
3067
3068         if (level > 0) {
3069                 error = xfs_btree_decrement(cur, level, &i);
3070                 if (error)
3071                         return error;
3072         }
3073
3074         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3075         *stat = 1;
3076         return 0;
3077 }
3078
3079 /*
3080  * Single level of the btree record deletion routine.
3081  * Delete record pointed to by cur/level.
3082  * Remove the record from its block then rebalance the tree.
3083  * Return 0 for error, 1 for done, 2 to go on to the next level.
3084  */
3085 STATIC int                                      /* error */
3086 xfs_btree_delrec(
3087         struct xfs_btree_cur    *cur,           /* btree cursor */
3088         int                     level,          /* level removing record from */
3089         int                     *stat)          /* fail/done/go-on */
3090 {
3091         struct xfs_btree_block  *block;         /* btree block */
3092         union xfs_btree_ptr     cptr;           /* current block ptr */
3093         struct xfs_buf          *bp;            /* buffer for block */
3094         int                     error;          /* error return value */
3095         int                     i;              /* loop counter */
3096         union xfs_btree_key     key;            /* storage for keyp */
3097         union xfs_btree_key     *keyp = &key;   /* passed to the next level */
3098         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
3099         struct xfs_buf          *lbp;           /* left buffer pointer */
3100         struct xfs_btree_block  *left;          /* left btree block */
3101         int                     lrecs = 0;      /* left record count */
3102         int                     ptr;            /* key/record index */
3103         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
3104         struct xfs_buf          *rbp;           /* right buffer pointer */
3105         struct xfs_btree_block  *right;         /* right btree block */
3106         struct xfs_btree_block  *rrblock;       /* right-right btree block */
3107         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
3108         int                     rrecs = 0;      /* right record count */
3109         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
3110         int                     numrecs;        /* temporary numrec count */
3111
3112         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3113         XFS_BTREE_TRACE_ARGI(cur, level);
3114
3115         tcur = NULL;
3116
3117         /* Get the index of the entry being deleted, check for nothing there. */
3118         ptr = cur->bc_ptrs[level];
3119         if (ptr == 0) {
3120                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3121                 *stat = 0;
3122                 return 0;
3123         }
3124
3125         /* Get the buffer & block containing the record or key/ptr. */
3126         block = xfs_btree_get_block(cur, level, &bp);
3127         numrecs = xfs_btree_get_numrecs(block);
3128
3129 #ifdef DEBUG
3130         error = xfs_btree_check_block(cur, block, level, bp);
3131         if (error)
3132                 goto error0;
3133 #endif
3134
3135         /* Fail if we're off the end of the block. */
3136         if (ptr > numrecs) {
3137                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3138                 *stat = 0;
3139                 return 0;
3140         }
3141
3142         XFS_BTREE_STATS_INC(cur, delrec);
3143         XFS_BTREE_STATS_ADD(cur, moves, numrecs - ptr);
3144
3145         /* Excise the entries being deleted. */
3146         if (level > 0) {
3147                 /* It's a nonleaf. operate on keys and ptrs */
3148                 union xfs_btree_key     *lkp;
3149                 union xfs_btree_ptr     *lpp;
3150
3151                 lkp = xfs_btree_key_addr(cur, ptr + 1, block);
3152                 lpp = xfs_btree_ptr_addr(cur, ptr + 1, block);
3153
3154 #ifdef DEBUG
3155                 for (i = 0; i < numrecs - ptr; i++) {
3156                         error = xfs_btree_check_ptr(cur, lpp, i, level);
3157                         if (error)
3158                                 goto error0;
3159                 }
3160 #endif
3161
3162                 if (ptr < numrecs) {
3163                         xfs_btree_shift_keys(cur, lkp, -1, numrecs - ptr);
3164                         xfs_btree_shift_ptrs(cur, lpp, -1, numrecs - ptr);
3165                         xfs_btree_log_keys(cur, bp, ptr, numrecs - 1);
3166                         xfs_btree_log_ptrs(cur, bp, ptr, numrecs - 1);
3167                 }
3168
3169                 /*
3170                  * If it's the first record in the block, we'll need to pass a
3171                  * key up to the next level (updkey).
3172                  */
3173                 if (ptr == 1)
3174                         keyp = xfs_btree_key_addr(cur, 1, block);
3175         } else {
3176                 /* It's a leaf. operate on records */
3177                 if (ptr < numrecs) {
3178                         xfs_btree_shift_recs(cur,
3179                                 xfs_btree_rec_addr(cur, ptr + 1, block),
3180                                 -1, numrecs - ptr);
3181                         xfs_btree_log_recs(cur, bp, ptr, numrecs - 1);
3182                 }
3183
3184                 /*
3185                  * If it's the first record in the block, we'll need a key
3186                  * structure to pass up to the next level (updkey).
3187                  */
3188                 if (ptr == 1) {
3189                         cur->bc_ops->init_key_from_rec(&key,
3190                                         xfs_btree_rec_addr(cur, 1, block));
3191                         keyp = &key;
3192                 }
3193         }
3194
3195         /*
3196          * Decrement and log the number of entries in the block.
3197          */
3198         xfs_btree_set_numrecs(block, --numrecs);
3199         xfs_btree_log_block(cur, bp, XFS_BB_NUMRECS);
3200
3201         /*
3202          * If we are tracking the last record in the tree and
3203          * we are at the far right edge of the tree, update it.
3204          */
3205         if (xfs_btree_is_lastrec(cur, block, level)) {
3206                 cur->bc_ops->update_lastrec(cur, block, NULL,
3207                                             ptr, LASTREC_DELREC);
3208         }
3209
3210         /*
3211          * We're at the root level.  First, shrink the root block in-memory.
3212          * Try to get rid of the next level down.  If we can't then there's
3213          * nothing left to do.
3214          */
3215         if (level == cur->bc_nlevels - 1) {
3216                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3217                         xfs_iroot_realloc(cur->bc_private.b.ip, -1,
3218                                           cur->bc_private.b.whichfork);
3219
3220                         error = xfs_btree_kill_iroot(cur);
3221                         if (error)
3222                                 goto error0;
3223
3224                         error = xfs_btree_dec_cursor(cur, level, stat);
3225                         if (error)
3226                                 goto error0;
3227                         *stat = 1;
3228                         return 0;
3229                 }
3230
3231                 /*
3232                  * If this is the root level, and there's only one entry left,
3233                  * and it's NOT the leaf level, then we can get rid of this
3234                  * level.
3235                  */
3236                 if (numrecs == 1 && level > 0) {
3237                         union xfs_btree_ptr     *pp;
3238                         /*
3239                          * pp is still set to the first pointer in the block.
3240                          * Make it the new root of the btree.
3241                          */
3242                         pp = xfs_btree_ptr_addr(cur, 1, block);
3243                         error = cur->bc_ops->kill_root(cur, bp, level, pp);
3244                         if (error)
3245                                 goto error0;
3246                 } else if (level > 0) {
3247                         error = xfs_btree_dec_cursor(cur, level, stat);
3248                         if (error)
3249                                 goto error0;
3250                 }
3251                 *stat = 1;
3252                 return 0;
3253         }
3254
3255         /*
3256          * If we deleted the leftmost entry in the block, update the
3257          * key values above us in the tree.
3258          */
3259         if (ptr == 1) {
3260                 error = xfs_btree_updkey(cur, keyp, level + 1);
3261                 if (error)
3262                         goto error0;
3263         }
3264
3265         /*
3266          * If the number of records remaining in the block is at least
3267          * the minimum, we're done.
3268          */
3269         if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) {
3270                 error = xfs_btree_dec_cursor(cur, level, stat);
3271                 if (error)
3272                         goto error0;
3273                 return 0;
3274         }
3275
3276         /*
3277          * Otherwise, we have to move some records around to keep the
3278          * tree balanced.  Look at the left and right sibling blocks to
3279          * see if we can re-balance by moving only one record.
3280          */
3281         xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
3282         xfs_btree_get_sibling(cur, block, &lptr, XFS_BB_LEFTSIB);
3283
3284         if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) {
3285                 /*
3286                  * One child of root, need to get a chance to copy its contents
3287                  * into the root and delete it. Can't go up to next level,
3288                  * there's nothing to delete there.
3289                  */
3290                 if (xfs_btree_ptr_is_null(cur, &rptr) &&
3291                     xfs_btree_ptr_is_null(cur, &lptr) &&
3292                     level == cur->bc_nlevels - 2) {
3293                         error = xfs_btree_kill_iroot(cur);
3294                         if (!error)
3295                                 error = xfs_btree_dec_cursor(cur, level, stat);
3296                         if (error)
3297                                 goto error0;
3298                         return 0;
3299                 }
3300         }
3301
3302         ASSERT(!xfs_btree_ptr_is_null(cur, &rptr) ||
3303                !xfs_btree_ptr_is_null(cur, &lptr));
3304
3305         /*
3306          * Duplicate the cursor so our btree manipulations here won't
3307          * disrupt the next level up.
3308          */
3309         error = xfs_btree_dup_cursor(cur, &tcur);
3310         if (error)
3311                 goto error0;
3312
3313         /*
3314          * If there's a right sibling, see if it's ok to shift an entry
3315          * out of it.
3316          */
3317         if (!xfs_btree_ptr_is_null(cur, &rptr)) {
3318                 /*
3319                  * Move the temp cursor to the last entry in the next block.
3320                  * Actually any entry but the first would suffice.
3321                  */
3322                 i = xfs_btree_lastrec(tcur, level);
3323                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3324
3325                 error = xfs_btree_increment(tcur, level, &i);
3326                 if (error)
3327                         goto error0;
3328                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3329
3330                 i = xfs_btree_lastrec(tcur, level);
3331                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3332
3333                 /* Grab a pointer to the block. */
3334                 right = xfs_btree_get_block(tcur, level, &rbp);
3335 #ifdef DEBUG
3336                 error = xfs_btree_check_block(tcur, right, level, rbp);
3337                 if (error)
3338                         goto error0;
3339 #endif
3340                 /* Grab the current block number, for future use. */
3341                 xfs_btree_get_sibling(tcur, right, &cptr, XFS_BB_LEFTSIB);
3342
3343                 /*
3344                  * If right block is full enough so that removing one entry
3345                  * won't make it too empty, and left-shifting an entry out
3346                  * of right to us works, we're done.
3347                  */
3348                 if (xfs_btree_get_numrecs(right) - 1 >=
3349                     cur->bc_ops->get_minrecs(tcur, level)) {
3350                         error = xfs_btree_lshift(tcur, level, &i);
3351                         if (error)
3352                                 goto error0;
3353                         if (i) {
3354                                 ASSERT(xfs_btree_get_numrecs(block) >=
3355                                        cur->bc_ops->get_minrecs(tcur, level));
3356
3357                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3358                                 tcur = NULL;
3359
3360                                 error = xfs_btree_dec_cursor(cur, level, stat);
3361                                 if (error)
3362                                         goto error0;
3363                                 return 0;
3364                         }
3365                 }
3366
3367                 /*
3368                  * Otherwise, grab the number of records in right for
3369                  * future reference, and fix up the temp cursor to point
3370                  * to our block again (last record).
3371                  */
3372                 rrecs = xfs_btree_get_numrecs(right);
3373                 if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3374                         i = xfs_btree_firstrec(tcur, level);
3375                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3376
3377                         error = xfs_btree_decrement(tcur, level, &i);
3378                         if (error)
3379                                 goto error0;
3380                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3381                 }
3382         }
3383
3384         /*
3385          * If there's a left sibling, see if it's ok to shift an entry
3386          * out of it.
3387          */
3388         if (!xfs_btree_ptr_is_null(cur, &lptr)) {
3389                 /*
3390                  * Move the temp cursor to the first entry in the
3391                  * previous block.
3392                  */
3393                 i = xfs_btree_firstrec(tcur, level);
3394                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3395
3396                 error = xfs_btree_decrement(tcur, level, &i);
3397                 if (error)
3398                         goto error0;
3399                 i = xfs_btree_firstrec(tcur, level);
3400                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
3401
3402                 /* Grab a pointer to the block. */
3403                 left = xfs_btree_get_block(tcur, level, &lbp);
3404 #ifdef DEBUG
3405                 error = xfs_btree_check_block(cur, left, level, lbp);
3406                 if (error)
3407                         goto error0;
3408 #endif
3409                 /* Grab the current block number, for future use. */
3410                 xfs_btree_get_sibling(tcur, left, &cptr, XFS_BB_RIGHTSIB);
3411
3412                 /*
3413                  * If left block is full enough so that removing one entry
3414                  * won't make it too empty, and right-shifting an entry out
3415                  * of left to us works, we're done.
3416                  */
3417                 if (xfs_btree_get_numrecs(left) - 1 >=
3418                     cur->bc_ops->get_minrecs(tcur, level)) {
3419                         error = xfs_btree_rshift(tcur, level, &i);
3420                         if (error)
3421                                 goto error0;
3422                         if (i) {
3423                                 ASSERT(xfs_btree_get_numrecs(block) >=
3424                                        cur->bc_ops->get_minrecs(tcur, level));
3425                                 xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3426                                 tcur = NULL;
3427                                 if (level == 0)
3428                                         cur->bc_ptrs[0]++;
3429                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3430                                 *stat = 1;
3431                                 return 0;
3432                         }
3433                 }
3434
3435                 /*
3436                  * Otherwise, grab the number of records in right for
3437                  * future reference.
3438                  */
3439                 lrecs = xfs_btree_get_numrecs(left);
3440         }
3441
3442         /* Delete the temp cursor, we're done with it. */
3443         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
3444         tcur = NULL;
3445
3446         /* If here, we need to do a join to keep the tree balanced. */
3447         ASSERT(!xfs_btree_ptr_is_null(cur, &cptr));
3448
3449         if (!xfs_btree_ptr_is_null(cur, &lptr) &&
3450             lrecs + xfs_btree_get_numrecs(block) <=
3451                         cur->bc_ops->get_maxrecs(cur, level)) {
3452                 /*
3453                  * Set "right" to be the starting block,
3454                  * "left" to be the left neighbor.
3455                  */
3456                 rptr = cptr;
3457                 right = block;
3458                 rbp = bp;
3459                 error = xfs_btree_read_buf_block(cur, &lptr, level,
3460                                                         0, &left, &lbp);
3461                 if (error)
3462                         goto error0;
3463
3464         /*
3465          * If that won't work, see if we can join with the right neighbor block.
3466          */
3467         } else if (!xfs_btree_ptr_is_null(cur, &rptr) &&
3468                    rrecs + xfs_btree_get_numrecs(block) <=
3469                         cur->bc_ops->get_maxrecs(cur, level)) {
3470                 /*
3471                  * Set "left" to be the starting block,
3472                  * "right" to be the right neighbor.
3473                  */
3474                 lptr = cptr;
3475                 left = block;
3476                 lbp = bp;
3477                 error = xfs_btree_read_buf_block(cur, &rptr, level,
3478                                                         0, &right, &rbp);
3479                 if (error)
3480                         goto error0;
3481
3482         /*
3483          * Otherwise, we can't fix the imbalance.
3484          * Just return.  This is probably a logic error, but it's not fatal.
3485          */
3486         } else {
3487                 error = xfs_btree_dec_cursor(cur, level, stat);
3488                 if (error)
3489                         goto error0;
3490                 return 0;
3491         }
3492
3493         rrecs = xfs_btree_get_numrecs(right);
3494         lrecs = xfs_btree_get_numrecs(left);
3495
3496         /*
3497          * We're now going to join "left" and "right" by moving all the stuff
3498          * in "right" to "left" and deleting "right".
3499          */
3500         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
3501         if (level > 0) {
3502                 /* It's a non-leaf.  Move keys and pointers. */
3503                 union xfs_btree_key     *lkp;   /* left btree key */
3504                 union xfs_btree_ptr     *lpp;   /* left address pointer */
3505                 union xfs_btree_key     *rkp;   /* right btree key */
3506                 union xfs_btree_ptr     *rpp;   /* right address pointer */
3507
3508                 lkp = xfs_btree_key_addr(cur, lrecs + 1, left);
3509                 lpp = xfs_btree_ptr_addr(cur, lrecs + 1, left);
3510                 rkp = xfs_btree_key_addr(cur, 1, right);
3511                 rpp = xfs_btree_ptr_addr(cur, 1, right);
3512 #ifdef DEBUG
3513                 for (i = 1; i < rrecs; i++) {
3514                         error = xfs_btree_check_ptr(cur, rpp, i, level);
3515                         if (error)
3516                                 goto error0;
3517                 }
3518 #endif
3519                 xfs_btree_copy_keys(cur, lkp, rkp, rrecs);
3520                 xfs_btree_copy_ptrs(cur, lpp, rpp, rrecs);
3521
3522                 xfs_btree_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
3523                 xfs_btree_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
3524         } else {
3525                 /* It's a leaf.  Move records.  */
3526                 union xfs_btree_rec     *lrp;   /* left record pointer */
3527                 union xfs_btree_rec     *rrp;   /* right record pointer */
3528
3529                 lrp = xfs_btree_rec_addr(cur, lrecs + 1, left);
3530                 rrp = xfs_btree_rec_addr(cur, 1, right);
3531
3532                 xfs_btree_copy_recs(cur, lrp, rrp, rrecs);
3533                 xfs_btree_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
3534         }
3535
3536         XFS_BTREE_STATS_INC(cur, join);
3537
3538         /*
3539          * Fix up the the number of records and right block pointer in the
3540          * surviving block, and log it.
3541          */
3542         xfs_btree_set_numrecs(left, lrecs + rrecs);
3543         xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
3544         xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3545         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
3546
3547         /* If there is a right sibling, point it to the remaining block. */
3548         xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
3549         if (!xfs_btree_ptr_is_null(cur, &cptr)) {
3550                 error = xfs_btree_read_buf_block(cur, &cptr, level,
3551                                                         0, &rrblock, &rrbp);
3552                 if (error)
3553                         goto error0;
3554                 xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);
3555                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
3556         }
3557
3558         /* Free the deleted block. */
3559         error = cur->bc_ops->free_block(cur, rbp);
3560         if (error)
3561                 goto error0;
3562         XFS_BTREE_STATS_INC(cur, free);
3563
3564         /*
3565          * If we joined with the left neighbor, set the buffer in the
3566          * cursor to the left block, and fix up the index.
3567          */
3568         if (bp != lbp) {
3569                 cur->bc_bufs[level] = lbp;
3570                 cur->bc_ptrs[level] += lrecs;
3571                 cur->bc_ra[level] = 0;
3572         }
3573         /*
3574          * If we joined with the right neighbor and there's a level above
3575          * us, increment the cursor at that level.
3576          */
3577         else if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) ||
3578                    (level + 1 < cur->bc_nlevels)) {
3579                 error = xfs_btree_increment(cur, level + 1, &i);
3580                 if (error)
3581                         goto error0;
3582         }
3583
3584         /*
3585          * Readjust the ptr at this level if it's not a leaf, since it's
3586          * still pointing at the deletion point, which makes the cursor
3587          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
3588          * We can't use decrement because it would change the next level up.
3589          */
3590         if (level > 0)
3591                 cur->bc_ptrs[level]--;
3592
3593         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3594         /* Return value means the next level up has something to do. */
3595         *stat = 2;
3596         return 0;
3597
3598 error0:
3599         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3600         if (tcur)
3601                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
3602         return error;
3603 }
3604
3605 /*
3606  * Delete the record pointed to by cur.
3607  * The cursor refers to the place where the record was (could be inserted)
3608  * when the operation returns.
3609  */
3610 int                                     /* error */
3611 xfs_btree_delete(
3612         struct xfs_btree_cur    *cur,
3613         int                     *stat)  /* success/failure */
3614 {
3615         int                     error;  /* error return value */
3616         int                     level;
3617         int                     i;
3618
3619         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
3620
3621         /*
3622          * Go up the tree, starting at leaf level.
3623          *
3624          * If 2 is returned then a join was done; go to the next level.
3625          * Otherwise we are done.
3626          */
3627         for (level = 0, i = 2; i == 2; level++) {
3628                 error = xfs_btree_delrec(cur, level, &i);
3629                 if (error)
3630                         goto error0;
3631         }
3632
3633         if (i == 0) {
3634                 for (level = 1; level < cur->bc_nlevels; level++) {
3635                         if (cur->bc_ptrs[level] == 0) {
3636                                 error = xfs_btree_decrement(cur, level, &i);
3637                                 if (error)
3638                                         goto error0;
3639                                 break;
3640                         }
3641                 }
3642         }
3643
3644         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
3645         *stat = i;
3646         return 0;
3647 error0:
3648         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
3649         return error;
3650 }
3651
3652 /*
3653  * Get the data from the pointed-to record.
3654  */
3655 int                                     /* error */
3656 xfs_btree_get_rec(
3657         struct xfs_btree_cur    *cur,   /* btree cursor */
3658         union xfs_btree_rec     **recp, /* output: btree record */
3659         int                     *stat)  /* output: success/failure */
3660 {
3661         struct xfs_btree_block  *block; /* btree block */
3662         struct xfs_buf          *bp;    /* buffer pointer */
3663         int                     ptr;    /* record number */
3664 #ifdef DEBUG
3665         int                     error;  /* error return value */
3666 #endif
3667
3668         ptr = cur->bc_ptrs[0];
3669         block = xfs_btree_get_block(cur, 0, &bp);
3670
3671 #ifdef DEBUG
3672         error = xfs_btree_check_block(cur, block, 0, bp);
3673         if (error)
3674                 return error;
3675 #endif
3676
3677         /*
3678          * Off the right end or left end, return failure.
3679          */
3680         if (ptr > xfs_btree_get_numrecs(block) || ptr <= 0) {
3681                 *stat = 0;
3682                 return 0;
3683         }
3684
3685         /*
3686          * Point to the record and extract its data.
3687          */
3688         *recp = xfs_btree_rec_addr(cur, ptr, block);
3689         *stat = 1;
3690         return 0;
3691 }