[XFS] implement generic xfs_btree_split
[linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_inode_item.h"
38 #include "xfs_btree.h"
39 #include "xfs_btree_trace.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_error.h"
42
43 /*
44  * Cursor allocation zone.
45  */
46 kmem_zone_t     *xfs_btree_cur_zone;
47
48 /*
49  * Btree magic numbers.
50  */
51 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
52         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
53 };
54
55 /*
56  * External routines.
57  */
58
59 #ifdef DEBUG
60 /*
61  * Debug routine: check that keys are in the right order.
62  */
63 void
64 xfs_btree_check_key(
65         xfs_btnum_t     btnum,          /* btree identifier */
66         void            *ak1,           /* pointer to left (lower) key */
67         void            *ak2)           /* pointer to right (higher) key */
68 {
69         switch (btnum) {
70         case XFS_BTNUM_BNO: {
71                 xfs_alloc_key_t *k1;
72                 xfs_alloc_key_t *k2;
73
74                 k1 = ak1;
75                 k2 = ak2;
76                 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
77                 break;
78             }
79         case XFS_BTNUM_CNT: {
80                 xfs_alloc_key_t *k1;
81                 xfs_alloc_key_t *k2;
82
83                 k1 = ak1;
84                 k2 = ak2;
85                 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
86                        (k1->ar_blockcount == k2->ar_blockcount &&
87                         be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
88                 break;
89             }
90         case XFS_BTNUM_BMAP: {
91                 xfs_bmbt_key_t  *k1;
92                 xfs_bmbt_key_t  *k2;
93
94                 k1 = ak1;
95                 k2 = ak2;
96                 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
97                 break;
98             }
99         case XFS_BTNUM_INO: {
100                 xfs_inobt_key_t *k1;
101                 xfs_inobt_key_t *k2;
102
103                 k1 = ak1;
104                 k2 = ak2;
105                 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
106                 break;
107             }
108         default:
109                 ASSERT(0);
110         }
111 }
112
113 /*
114  * Debug routine: check that records are in the right order.
115  */
116 void
117 xfs_btree_check_rec(
118         xfs_btnum_t     btnum,          /* btree identifier */
119         void            *ar1,           /* pointer to left (lower) record */
120         void            *ar2)           /* pointer to right (higher) record */
121 {
122         switch (btnum) {
123         case XFS_BTNUM_BNO: {
124                 xfs_alloc_rec_t *r1;
125                 xfs_alloc_rec_t *r2;
126
127                 r1 = ar1;
128                 r2 = ar2;
129                 ASSERT(be32_to_cpu(r1->ar_startblock) +
130                        be32_to_cpu(r1->ar_blockcount) <=
131                        be32_to_cpu(r2->ar_startblock));
132                 break;
133             }
134         case XFS_BTNUM_CNT: {
135                 xfs_alloc_rec_t *r1;
136                 xfs_alloc_rec_t *r2;
137
138                 r1 = ar1;
139                 r2 = ar2;
140                 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
141                        (r1->ar_blockcount == r2->ar_blockcount &&
142                         be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
143                 break;
144             }
145         case XFS_BTNUM_BMAP: {
146                 xfs_bmbt_rec_t  *r1;
147                 xfs_bmbt_rec_t  *r2;
148
149                 r1 = ar1;
150                 r2 = ar2;
151                 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
152                        xfs_bmbt_disk_get_blockcount(r1) <=
153                        xfs_bmbt_disk_get_startoff(r2));
154                 break;
155             }
156         case XFS_BTNUM_INO: {
157                 xfs_inobt_rec_t *r1;
158                 xfs_inobt_rec_t *r2;
159
160                 r1 = ar1;
161                 r2 = ar2;
162                 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
163                        be32_to_cpu(r2->ir_startino));
164                 break;
165             }
166         default:
167                 ASSERT(0);
168         }
169 }
170 #endif  /* DEBUG */
171
172 int                                     /* error (0 or EFSCORRUPTED) */
173 xfs_btree_check_lblock(
174         struct xfs_btree_cur    *cur,   /* btree cursor */
175         struct xfs_btree_lblock *block, /* btree long form block pointer */
176         int                     level,  /* level of the btree block */
177         struct xfs_buf          *bp)    /* buffer for block, if any */
178 {
179         int                     lblock_ok; /* block passes checks */
180         struct xfs_mount        *mp;    /* file system mount point */
181
182         mp = cur->bc_mp;
183         lblock_ok =
184                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
185                 be16_to_cpu(block->bb_level) == level &&
186                 be16_to_cpu(block->bb_numrecs) <=
187                         cur->bc_ops->get_maxrecs(cur, level) &&
188                 block->bb_leftsib &&
189                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
190                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
191                 block->bb_rightsib &&
192                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
193                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
194         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
195                         XFS_ERRTAG_BTREE_CHECK_LBLOCK,
196                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
197                 if (bp)
198                         xfs_buftrace("LBTREE ERROR", bp);
199                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
200                                  mp);
201                 return XFS_ERROR(EFSCORRUPTED);
202         }
203         return 0;
204 }
205
206 int                                     /* error (0 or EFSCORRUPTED) */
207 xfs_btree_check_sblock(
208         struct xfs_btree_cur    *cur,   /* btree cursor */
209         struct xfs_btree_sblock *block, /* btree short form block pointer */
210         int                     level,  /* level of the btree block */
211         struct xfs_buf          *bp)    /* buffer containing block */
212 {
213         struct xfs_buf          *agbp;  /* buffer for ag. freespace struct */
214         struct xfs_agf          *agf;   /* ag. freespace structure */
215         xfs_agblock_t           agflen; /* native ag. freespace length */
216         int                     sblock_ok; /* block passes checks */
217
218         agbp = cur->bc_private.a.agbp;
219         agf = XFS_BUF_TO_AGF(agbp);
220         agflen = be32_to_cpu(agf->agf_length);
221         sblock_ok =
222                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
223                 be16_to_cpu(block->bb_level) == level &&
224                 be16_to_cpu(block->bb_numrecs) <=
225                         cur->bc_ops->get_maxrecs(cur, level) &&
226                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
227                  be32_to_cpu(block->bb_leftsib) < agflen) &&
228                 block->bb_leftsib &&
229                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
230                  be32_to_cpu(block->bb_rightsib) < agflen) &&
231                 block->bb_rightsib;
232         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
233                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
234                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
235                 if (bp)
236                         xfs_buftrace("SBTREE ERROR", bp);
237                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
238                                  cur->bc_mp);
239                 return XFS_ERROR(EFSCORRUPTED);
240         }
241         return 0;
242 }
243
244 /*
245  * Debug routine: check that block header is ok.
246  */
247 int
248 xfs_btree_check_block(
249         struct xfs_btree_cur    *cur,   /* btree cursor */
250         struct xfs_btree_block  *block, /* generic btree block pointer */
251         int                     level,  /* level of the btree block */
252         struct xfs_buf          *bp)    /* buffer containing block, if any */
253 {
254         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
255                 return xfs_btree_check_lblock(cur,
256                                 (struct xfs_btree_lblock *)block, level, bp);
257         } else {
258                 return xfs_btree_check_sblock(cur,
259                                 (struct xfs_btree_sblock *)block, level, bp);
260         }
261 }
262
263 /*
264  * Check that (long) pointer is ok.
265  */
266 int                                     /* error (0 or EFSCORRUPTED) */
267 xfs_btree_check_lptr(
268         struct xfs_btree_cur    *cur,   /* btree cursor */
269         xfs_dfsbno_t            bno,    /* btree block disk address */
270         int                     level)  /* btree block level */
271 {
272         XFS_WANT_CORRUPTED_RETURN(
273                 level > 0 &&
274                 bno != NULLDFSBNO &&
275                 XFS_FSB_SANITY_CHECK(cur->bc_mp, bno));
276         return 0;
277 }
278
279 /*
280  * Check that (short) pointer is ok.
281  */
282 int                                     /* error (0 or EFSCORRUPTED) */
283 xfs_btree_check_sptr(
284         struct xfs_btree_cur    *cur,   /* btree cursor */
285         xfs_agblock_t           bno,    /* btree block disk address */
286         int                     level)  /* btree block level */
287 {
288         xfs_agblock_t           agblocks = cur->bc_mp->m_sb.sb_agblocks;
289
290         XFS_WANT_CORRUPTED_RETURN(
291                 level > 0 &&
292                 bno != NULLAGBLOCK &&
293                 bno != 0 &&
294                 bno < agblocks);
295         return 0;
296 }
297
298 /*
299  * Check that block ptr is ok.
300  */
301 int                                     /* error (0 or EFSCORRUPTED) */
302 xfs_btree_check_ptr(
303         struct xfs_btree_cur    *cur,   /* btree cursor */
304         union xfs_btree_ptr     *ptr,   /* btree block disk address */
305         int                     index,  /* offset from ptr to check */
306         int                     level)  /* btree block level */
307 {
308         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
309                 return xfs_btree_check_lptr(cur,
310                                 be64_to_cpu((&ptr->l)[index]), level);
311         } else {
312                 return xfs_btree_check_sptr(cur,
313                                 be32_to_cpu((&ptr->s)[index]), level);
314         }
315 }
316
317 /*
318  * Delete the btree cursor.
319  */
320 void
321 xfs_btree_del_cursor(
322         xfs_btree_cur_t *cur,           /* btree cursor */
323         int             error)          /* del because of error */
324 {
325         int             i;              /* btree level */
326
327         /*
328          * Clear the buffer pointers, and release the buffers.
329          * If we're doing this in the face of an error, we
330          * need to make sure to inspect all of the entries
331          * in the bc_bufs array for buffers to be unlocked.
332          * This is because some of the btree code works from
333          * level n down to 0, and if we get an error along
334          * the way we won't have initialized all the entries
335          * down to 0.
336          */
337         for (i = 0; i < cur->bc_nlevels; i++) {
338                 if (cur->bc_bufs[i])
339                         xfs_btree_setbuf(cur, i, NULL);
340                 else if (!error)
341                         break;
342         }
343         /*
344          * Can't free a bmap cursor without having dealt with the
345          * allocated indirect blocks' accounting.
346          */
347         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
348                cur->bc_private.b.allocated == 0);
349         /*
350          * Free the cursor.
351          */
352         kmem_zone_free(xfs_btree_cur_zone, cur);
353 }
354
355 /*
356  * Duplicate the btree cursor.
357  * Allocate a new one, copy the record, re-get the buffers.
358  */
359 int                                     /* error */
360 xfs_btree_dup_cursor(
361         xfs_btree_cur_t *cur,           /* input cursor */
362         xfs_btree_cur_t **ncur)         /* output cursor */
363 {
364         xfs_buf_t       *bp;            /* btree block's buffer pointer */
365         int             error;          /* error return value */
366         int             i;              /* level number of btree block */
367         xfs_mount_t     *mp;            /* mount structure for filesystem */
368         xfs_btree_cur_t *new;           /* new cursor value */
369         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
370
371         tp = cur->bc_tp;
372         mp = cur->bc_mp;
373
374         /*
375          * Allocate a new cursor like the old one.
376          */
377         new = cur->bc_ops->dup_cursor(cur);
378
379         /*
380          * Copy the record currently in the cursor.
381          */
382         new->bc_rec = cur->bc_rec;
383
384         /*
385          * For each level current, re-get the buffer and copy the ptr value.
386          */
387         for (i = 0; i < new->bc_nlevels; i++) {
388                 new->bc_ptrs[i] = cur->bc_ptrs[i];
389                 new->bc_ra[i] = cur->bc_ra[i];
390                 if ((bp = cur->bc_bufs[i])) {
391                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
392                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
393                                 xfs_btree_del_cursor(new, error);
394                                 *ncur = NULL;
395                                 return error;
396                         }
397                         new->bc_bufs[i] = bp;
398                         ASSERT(bp);
399                         ASSERT(!XFS_BUF_GETERROR(bp));
400                 } else
401                         new->bc_bufs[i] = NULL;
402         }
403         *ncur = new;
404         return 0;
405 }
406
407 /*
408  * XFS btree block layout and addressing:
409  *
410  * There are two types of blocks in the btree: leaf and non-leaf blocks.
411  *
412  * The leaf record start with a header then followed by records containing
413  * the values.  A non-leaf block also starts with the same header, and
414  * then first contains lookup keys followed by an equal number of pointers
415  * to the btree blocks at the previous level.
416  *
417  *              +--------+-------+-------+-------+-------+-------+-------+
418  * Leaf:        | header | rec 1 | rec 2 | rec 3 | rec 4 | rec 5 | rec N |
419  *              +--------+-------+-------+-------+-------+-------+-------+
420  *
421  *              +--------+-------+-------+-------+-------+-------+-------+
422  * Non-Leaf:    | header | key 1 | key 2 | key N | ptr 1 | ptr 2 | ptr N |
423  *              +--------+-------+-------+-------+-------+-------+-------+
424  *
425  * The header is called struct xfs_btree_block for reasons better left unknown
426  * and comes in different versions for short (32bit) and long (64bit) block
427  * pointers.  The record and key structures are defined by the btree instances
428  * and opaque to the btree core.  The block pointers are simple disk endian
429  * integers, available in a short (32bit) and long (64bit) variant.
430  *
431  * The helpers below calculate the offset of a given record, key or pointer
432  * into a btree block (xfs_btree_*_offset) or return a pointer to the given
433  * record, key or pointer (xfs_btree_*_addr).  Note that all addressing
434  * inside the btree block is done using indices starting at one, not zero!
435  */
436
437 /*
438  * Return size of the btree block header for this btree instance.
439  */
440 static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)
441 {
442         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
443                 sizeof(struct xfs_btree_lblock) :
444                 sizeof(struct xfs_btree_sblock);
445 }
446
447 /*
448  * Return size of btree block pointers for this btree instance.
449  */
450 static inline size_t xfs_btree_ptr_len(struct xfs_btree_cur *cur)
451 {
452         return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
453                 sizeof(__be64) : sizeof(__be32);
454 }
455
456 /*
457  * Calculate offset of the n-th record in a btree block.
458  */
459 STATIC size_t
460 xfs_btree_rec_offset(
461         struct xfs_btree_cur    *cur,
462         int                     n)
463 {
464         return xfs_btree_block_len(cur) +
465                 (n - 1) * cur->bc_ops->rec_len;
466 }
467
468 /*
469  * Calculate offset of the n-th key in a btree block.
470  */
471 STATIC size_t
472 xfs_btree_key_offset(
473         struct xfs_btree_cur    *cur,
474         int                     n)
475 {
476         return xfs_btree_block_len(cur) +
477                 (n - 1) * cur->bc_ops->key_len;
478 }
479
480 /*
481  * Calculate offset of the n-th block pointer in a btree block.
482  */
483 STATIC size_t
484 xfs_btree_ptr_offset(
485         struct xfs_btree_cur    *cur,
486         int                     n,
487         int                     level)
488 {
489         return xfs_btree_block_len(cur) +
490                 cur->bc_ops->get_maxrecs(cur, level) * cur->bc_ops->key_len +
491                 (n - 1) * xfs_btree_ptr_len(cur);
492 }
493
494 /*
495  * Return a pointer to the n-th record in the btree block.
496  */
497 STATIC union xfs_btree_rec *
498 xfs_btree_rec_addr(
499         struct xfs_btree_cur    *cur,
500         int                     n,
501         struct xfs_btree_block  *block)
502 {
503         return (union xfs_btree_rec *)
504                 ((char *)block + xfs_btree_rec_offset(cur, n));
505 }
506
507 /*
508  * Return a pointer to the n-th key in the btree block.
509  */
510 STATIC union xfs_btree_key *
511 xfs_btree_key_addr(
512         struct xfs_btree_cur    *cur,
513         int                     n,
514         struct xfs_btree_block  *block)
515 {
516         return (union xfs_btree_key *)
517                 ((char *)block + xfs_btree_key_offset(cur, n));
518 }
519
520 /*
521  * Return a pointer to the n-th block pointer in the btree block.
522  */
523 STATIC union xfs_btree_ptr *
524 xfs_btree_ptr_addr(
525         struct xfs_btree_cur    *cur,
526         int                     n,
527         struct xfs_btree_block  *block)
528 {
529         int                     level = xfs_btree_get_level(block);
530
531         ASSERT(block->bb_level != 0);
532
533         return (union xfs_btree_ptr *)
534                 ((char *)block + xfs_btree_ptr_offset(cur, n, level));
535 }
536
537 /*
538  * Get a the root block which is stored in the inode.
539  *
540  * For now this btree implementation assumes the btree root is always
541  * stored in the if_broot field of an inode fork.
542  */
543 STATIC struct xfs_btree_block *
544 xfs_btree_get_iroot(
545        struct xfs_btree_cur    *cur)
546 {
547        struct xfs_ifork        *ifp;
548
549        ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, cur->bc_private.b.whichfork);
550        return (struct xfs_btree_block *)ifp->if_broot;
551 }
552
553 /*
554  * Retrieve the block pointer from the cursor at the given level.
555  * This may be an inode btree root or from a buffer.
556  */
557 STATIC struct xfs_btree_block *         /* generic btree block pointer */
558 xfs_btree_get_block(
559         struct xfs_btree_cur    *cur,   /* btree cursor */
560         int                     level,  /* level in btree */
561         struct xfs_buf          **bpp)  /* buffer containing the block */
562 {
563         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
564             (level == cur->bc_nlevels - 1)) {
565                 *bpp = NULL;
566                 return xfs_btree_get_iroot(cur);
567         }
568
569         *bpp = cur->bc_bufs[level];
570         return XFS_BUF_TO_BLOCK(*bpp);
571 }
572
573 /*
574  * Get a buffer for the block, return it with no data read.
575  * Long-form addressing.
576  */
577 xfs_buf_t *                             /* buffer for fsbno */
578 xfs_btree_get_bufl(
579         xfs_mount_t     *mp,            /* file system mount point */
580         xfs_trans_t     *tp,            /* transaction pointer */
581         xfs_fsblock_t   fsbno,          /* file system block number */
582         uint            lock)           /* lock flags for get_buf */
583 {
584         xfs_buf_t       *bp;            /* buffer pointer (return value) */
585         xfs_daddr_t             d;              /* real disk block address */
586
587         ASSERT(fsbno != NULLFSBLOCK);
588         d = XFS_FSB_TO_DADDR(mp, fsbno);
589         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
590         ASSERT(bp);
591         ASSERT(!XFS_BUF_GETERROR(bp));
592         return bp;
593 }
594
595 /*
596  * Get a buffer for the block, return it with no data read.
597  * Short-form addressing.
598  */
599 xfs_buf_t *                             /* buffer for agno/agbno */
600 xfs_btree_get_bufs(
601         xfs_mount_t     *mp,            /* file system mount point */
602         xfs_trans_t     *tp,            /* transaction pointer */
603         xfs_agnumber_t  agno,           /* allocation group number */
604         xfs_agblock_t   agbno,          /* allocation group block number */
605         uint            lock)           /* lock flags for get_buf */
606 {
607         xfs_buf_t       *bp;            /* buffer pointer (return value) */
608         xfs_daddr_t             d;              /* real disk block address */
609
610         ASSERT(agno != NULLAGNUMBER);
611         ASSERT(agbno != NULLAGBLOCK);
612         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
613         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
614         ASSERT(bp);
615         ASSERT(!XFS_BUF_GETERROR(bp));
616         return bp;
617 }
618
619 /*
620  * Check for the cursor referring to the last block at the given level.
621  */
622 int                                     /* 1=is last block, 0=not last block */
623 xfs_btree_islastblock(
624         xfs_btree_cur_t         *cur,   /* btree cursor */
625         int                     level)  /* level to check */
626 {
627         xfs_btree_block_t       *block; /* generic btree block pointer */
628         xfs_buf_t               *bp;    /* buffer containing block */
629
630         block = xfs_btree_get_block(cur, level, &bp);
631         xfs_btree_check_block(cur, block, level, bp);
632         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
633                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
634         else
635                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
636 }
637
638 /*
639  * Change the cursor to point to the first record at the given level.
640  * Other levels are unaffected.
641  */
642 int                                     /* success=1, failure=0 */
643 xfs_btree_firstrec(
644         xfs_btree_cur_t         *cur,   /* btree cursor */
645         int                     level)  /* level to change */
646 {
647         xfs_btree_block_t       *block; /* generic btree block pointer */
648         xfs_buf_t               *bp;    /* buffer containing block */
649
650         /*
651          * Get the block pointer for this level.
652          */
653         block = xfs_btree_get_block(cur, level, &bp);
654         xfs_btree_check_block(cur, block, level, bp);
655         /*
656          * It's empty, there is no such record.
657          */
658         if (!block->bb_numrecs)
659                 return 0;
660         /*
661          * Set the ptr value to 1, that's the first record/key.
662          */
663         cur->bc_ptrs[level] = 1;
664         return 1;
665 }
666
667 /*
668  * Change the cursor to point to the last record in the current block
669  * at the given level.  Other levels are unaffected.
670  */
671 int                                     /* success=1, failure=0 */
672 xfs_btree_lastrec(
673         xfs_btree_cur_t         *cur,   /* btree cursor */
674         int                     level)  /* level to change */
675 {
676         xfs_btree_block_t       *block; /* generic btree block pointer */
677         xfs_buf_t               *bp;    /* buffer containing block */
678
679         /*
680          * Get the block pointer for this level.
681          */
682         block = xfs_btree_get_block(cur, level, &bp);
683         xfs_btree_check_block(cur, block, level, bp);
684         /*
685          * It's empty, there is no such record.
686          */
687         if (!block->bb_numrecs)
688                 return 0;
689         /*
690          * Set the ptr value to numrecs, that's the last record/key.
691          */
692         cur->bc_ptrs[level] = be16_to_cpu(block->bb_numrecs);
693         return 1;
694 }
695
696 /*
697  * Compute first and last byte offsets for the fields given.
698  * Interprets the offsets table, which contains struct field offsets.
699  */
700 void
701 xfs_btree_offsets(
702         __int64_t       fields,         /* bitmask of fields */
703         const short     *offsets,       /* table of field offsets */
704         int             nbits,          /* number of bits to inspect */
705         int             *first,         /* output: first byte offset */
706         int             *last)          /* output: last byte offset */
707 {
708         int             i;              /* current bit number */
709         __int64_t       imask;          /* mask for current bit number */
710
711         ASSERT(fields != 0);
712         /*
713          * Find the lowest bit, so the first byte offset.
714          */
715         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
716                 if (imask & fields) {
717                         *first = offsets[i];
718                         break;
719                 }
720         }
721         /*
722          * Find the highest bit, so the last byte offset.
723          */
724         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
725                 if (imask & fields) {
726                         *last = offsets[i + 1] - 1;
727                         break;
728                 }
729         }
730 }
731
732 /*
733  * Get a buffer for the block, return it read in.
734  * Long-form addressing.
735  */
736 int                                     /* error */
737 xfs_btree_read_bufl(
738         xfs_mount_t     *mp,            /* file system mount point */
739         xfs_trans_t     *tp,            /* transaction pointer */
740         xfs_fsblock_t   fsbno,          /* file system block number */
741         uint            lock,           /* lock flags for read_buf */
742         xfs_buf_t       **bpp,          /* buffer for fsbno */
743         int             refval)         /* ref count value for buffer */
744 {
745         xfs_buf_t       *bp;            /* return value */
746         xfs_daddr_t             d;              /* real disk block address */
747         int             error;
748
749         ASSERT(fsbno != NULLFSBLOCK);
750         d = XFS_FSB_TO_DADDR(mp, fsbno);
751         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
752                         mp->m_bsize, lock, &bp))) {
753                 return error;
754         }
755         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
756         if (bp != NULL) {
757                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
758         }
759         *bpp = bp;
760         return 0;
761 }
762
763 /*
764  * Get a buffer for the block, return it read in.
765  * Short-form addressing.
766  */
767 int                                     /* error */
768 xfs_btree_read_bufs(
769         xfs_mount_t     *mp,            /* file system mount point */
770         xfs_trans_t     *tp,            /* transaction pointer */
771         xfs_agnumber_t  agno,           /* allocation group number */
772         xfs_agblock_t   agbno,          /* allocation group block number */
773         uint            lock,           /* lock flags for read_buf */
774         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
775         int             refval)         /* ref count value for buffer */
776 {
777         xfs_buf_t       *bp;            /* return value */
778         xfs_daddr_t     d;              /* real disk block address */
779         int             error;
780
781         ASSERT(agno != NULLAGNUMBER);
782         ASSERT(agbno != NULLAGBLOCK);
783         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
784         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
785                                         mp->m_bsize, lock, &bp))) {
786                 return error;
787         }
788         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
789         if (bp != NULL) {
790                 switch (refval) {
791                 case XFS_ALLOC_BTREE_REF:
792                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
793                         break;
794                 case XFS_INO_BTREE_REF:
795                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
796                         break;
797                 }
798         }
799         *bpp = bp;
800         return 0;
801 }
802
803 /*
804  * Read-ahead the block, don't wait for it, don't return a buffer.
805  * Long-form addressing.
806  */
807 /* ARGSUSED */
808 void
809 xfs_btree_reada_bufl(
810         xfs_mount_t     *mp,            /* file system mount point */
811         xfs_fsblock_t   fsbno,          /* file system block number */
812         xfs_extlen_t    count)          /* count of filesystem blocks */
813 {
814         xfs_daddr_t             d;
815
816         ASSERT(fsbno != NULLFSBLOCK);
817         d = XFS_FSB_TO_DADDR(mp, fsbno);
818         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
819 }
820
821 /*
822  * Read-ahead the block, don't wait for it, don't return a buffer.
823  * Short-form addressing.
824  */
825 /* ARGSUSED */
826 void
827 xfs_btree_reada_bufs(
828         xfs_mount_t     *mp,            /* file system mount point */
829         xfs_agnumber_t  agno,           /* allocation group number */
830         xfs_agblock_t   agbno,          /* allocation group block number */
831         xfs_extlen_t    count)          /* count of filesystem blocks */
832 {
833         xfs_daddr_t             d;
834
835         ASSERT(agno != NULLAGNUMBER);
836         ASSERT(agbno != NULLAGBLOCK);
837         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
838         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
839 }
840
841 STATIC int
842 xfs_btree_readahead_lblock(
843         struct xfs_btree_cur    *cur,
844         int                     lr,
845         struct xfs_btree_block  *block)
846 {
847         int                     rval = 0;
848         xfs_fsblock_t           left = be64_to_cpu(block->bb_u.l.bb_leftsib);
849         xfs_fsblock_t           right = be64_to_cpu(block->bb_u.l.bb_rightsib);
850
851         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) {
852                 xfs_btree_reada_bufl(cur->bc_mp, left, 1);
853                 rval++;
854         }
855
856         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) {
857                 xfs_btree_reada_bufl(cur->bc_mp, right, 1);
858                 rval++;
859         }
860
861         return rval;
862 }
863
864 STATIC int
865 xfs_btree_readahead_sblock(
866         struct xfs_btree_cur    *cur,
867         int                     lr,
868         struct xfs_btree_block *block)
869 {
870         int                     rval = 0;
871         xfs_agblock_t           left = be32_to_cpu(block->bb_u.s.bb_leftsib);
872         xfs_agblock_t           right = be32_to_cpu(block->bb_u.s.bb_rightsib);
873
874
875         if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
876                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
877                                      left, 1);
878                 rval++;
879         }
880
881         if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
882                 xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
883                                      right, 1);
884                 rval++;
885         }
886
887         return rval;
888 }
889
890 /*
891  * Read-ahead btree blocks, at the given level.
892  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
893  */
894 int
895 xfs_btree_readahead(
896         struct xfs_btree_cur    *cur,           /* btree cursor */
897         int                     lev,            /* level in btree */
898         int                     lr)             /* left/right bits */
899 {
900         struct xfs_btree_block  *block;
901
902         /*
903          * No readahead needed if we are at the root level and the
904          * btree root is stored in the inode.
905          */
906         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
907             (lev == cur->bc_nlevels - 1))
908                 return 0;
909
910         if ((cur->bc_ra[lev] | lr) == cur->bc_ra[lev])
911                 return 0;
912
913         cur->bc_ra[lev] |= lr;
914         block = XFS_BUF_TO_BLOCK(cur->bc_bufs[lev]);
915
916         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
917                 return xfs_btree_readahead_lblock(cur, lr, block);
918         return xfs_btree_readahead_sblock(cur, lr, block);
919 }
920
921 /*
922  * Set the buffer for level "lev" in the cursor to bp, releasing
923  * any previous buffer.
924  */
925 void
926 xfs_btree_setbuf(
927         xfs_btree_cur_t         *cur,   /* btree cursor */
928         int                     lev,    /* level in btree */
929         xfs_buf_t               *bp)    /* new buffer to set */
930 {
931         xfs_btree_block_t       *b;     /* btree block */
932         xfs_buf_t               *obp;   /* old buffer pointer */
933
934         obp = cur->bc_bufs[lev];
935         if (obp)
936                 xfs_trans_brelse(cur->bc_tp, obp);
937         cur->bc_bufs[lev] = bp;
938         cur->bc_ra[lev] = 0;
939         if (!bp)
940                 return;
941         b = XFS_BUF_TO_BLOCK(bp);
942         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
943                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
944                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
945                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
946                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
947         } else {
948                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
949                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
950                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
951                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
952         }
953 }
954
955 STATIC int
956 xfs_btree_ptr_is_null(
957         struct xfs_btree_cur    *cur,
958         union xfs_btree_ptr     *ptr)
959 {
960         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
961                 return be64_to_cpu(ptr->l) == NULLFSBLOCK;
962         else
963                 return be32_to_cpu(ptr->s) == NULLAGBLOCK;
964 }
965
966 /*
967  * Get/set/init sibling pointers
968  */
969 STATIC void
970 xfs_btree_get_sibling(
971         struct xfs_btree_cur    *cur,
972         struct xfs_btree_block  *block,
973         union xfs_btree_ptr     *ptr,
974         int                     lr)
975 {
976         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
977
978         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
979                 if (lr == XFS_BB_RIGHTSIB)
980                         ptr->l = block->bb_u.l.bb_rightsib;
981                 else
982                         ptr->l = block->bb_u.l.bb_leftsib;
983         } else {
984                 if (lr == XFS_BB_RIGHTSIB)
985                         ptr->s = block->bb_u.s.bb_rightsib;
986                 else
987                         ptr->s = block->bb_u.s.bb_leftsib;
988         }
989 }
990
991 STATIC void
992 xfs_btree_set_sibling(
993         struct xfs_btree_cur    *cur,
994         struct xfs_btree_block  *block,
995         union xfs_btree_ptr     *ptr,
996         int                     lr)
997 {
998         ASSERT(lr == XFS_BB_LEFTSIB || lr == XFS_BB_RIGHTSIB);
999
1000         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1001                 if (lr == XFS_BB_RIGHTSIB)
1002                         block->bb_u.l.bb_rightsib = ptr->l;
1003                 else
1004                         block->bb_u.l.bb_leftsib = ptr->l;
1005         } else {
1006                 if (lr == XFS_BB_RIGHTSIB)
1007                         block->bb_u.s.bb_rightsib = ptr->s;
1008                 else
1009                         block->bb_u.s.bb_leftsib = ptr->s;
1010         }
1011 }
1012
1013 STATIC void
1014 xfs_btree_init_block(
1015         struct xfs_btree_cur    *cur,
1016         int                     level,
1017         int                     numrecs,
1018         struct xfs_btree_block  *new)   /* new block */
1019 {
1020         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1021         new->bb_level = cpu_to_be16(level);
1022         new->bb_numrecs = cpu_to_be16(numrecs);
1023
1024         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1025                 new->bb_u.l.bb_leftsib = cpu_to_be64(NULLFSBLOCK);
1026                 new->bb_u.l.bb_rightsib = cpu_to_be64(NULLFSBLOCK);
1027         } else {
1028                 new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1029                 new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1030         }
1031 }
1032
1033 /*
1034  * Return true if ptr is the last record in the btree and
1035  * we need to track updateÑ• to this record.  The decision
1036  * will be further refined in the update_lastrec method.
1037  */
1038 STATIC int
1039 xfs_btree_is_lastrec(
1040         struct xfs_btree_cur    *cur,
1041         struct xfs_btree_block  *block,
1042         int                     level)
1043 {
1044         union xfs_btree_ptr     ptr;
1045
1046         if (level > 0)
1047                 return 0;
1048         if (!(cur->bc_flags & XFS_BTREE_LASTREC_UPDATE))
1049                 return 0;
1050
1051         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1052         if (!xfs_btree_ptr_is_null(cur, &ptr))
1053                 return 0;
1054         return 1;
1055 }
1056
1057 STATIC void
1058 xfs_btree_buf_to_ptr(
1059         struct xfs_btree_cur    *cur,
1060         struct xfs_buf          *bp,
1061         union xfs_btree_ptr     *ptr)
1062 {
1063         if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
1064                 ptr->l = cpu_to_be64(XFS_DADDR_TO_FSB(cur->bc_mp,
1065                                         XFS_BUF_ADDR(bp)));
1066         else {
1067                 ptr->s = cpu_to_be32(XFS_DADDR_TO_AGBNO(cur->bc_mp,
1068                                         XFS_BUF_ADDR(bp)));
1069         }
1070 }
1071
1072 STATIC xfs_daddr_t
1073 xfs_btree_ptr_to_daddr(
1074         struct xfs_btree_cur    *cur,
1075         union xfs_btree_ptr     *ptr)
1076 {
1077         if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
1078                 ASSERT(be64_to_cpu(ptr->l) != NULLFSBLOCK);
1079
1080                 return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
1081         } else {
1082                 ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
1083                 ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
1084
1085                 return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
1086                                         be32_to_cpu(ptr->s));
1087         }
1088 }
1089
1090 STATIC void
1091 xfs_btree_set_refs(
1092         struct xfs_btree_cur    *cur,
1093         struct xfs_buf          *bp)
1094 {
1095         switch (cur->bc_btnum) {
1096         case XFS_BTNUM_BNO:
1097         case XFS_BTNUM_CNT:
1098                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
1099                 break;
1100         case XFS_BTNUM_INO:
1101                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF);
1102                 break;
1103         case XFS_BTNUM_BMAP:
1104                 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF);
1105                 break;
1106         default:
1107                 ASSERT(0);
1108         }
1109 }
1110
1111 STATIC int
1112 xfs_btree_get_buf_block(
1113         struct xfs_btree_cur    *cur,
1114         union xfs_btree_ptr     *ptr,
1115         int                     flags,
1116         struct xfs_btree_block  **block,
1117         struct xfs_buf          **bpp)
1118 {
1119         struct xfs_mount        *mp = cur->bc_mp;
1120         xfs_daddr_t             d;
1121
1122         /* need to sort out how callers deal with failures first */
1123         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1124
1125         d = xfs_btree_ptr_to_daddr(cur, ptr);
1126         *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
1127                                  mp->m_bsize, flags);
1128
1129         ASSERT(*bpp);
1130         ASSERT(!XFS_BUF_GETERROR(*bpp));
1131
1132         *block = XFS_BUF_TO_BLOCK(*bpp);
1133         return 0;
1134 }
1135
1136 /*
1137  * Read in the buffer at the given ptr and return the buffer and
1138  * the block pointer within the buffer.
1139  */
1140 STATIC int
1141 xfs_btree_read_buf_block(
1142         struct xfs_btree_cur    *cur,
1143         union xfs_btree_ptr     *ptr,
1144         int                     level,
1145         int                     flags,
1146         struct xfs_btree_block  **block,
1147         struct xfs_buf          **bpp)
1148 {
1149         struct xfs_mount        *mp = cur->bc_mp;
1150         xfs_daddr_t             d;
1151         int                     error;
1152
1153         /* need to sort out how callers deal with failures first */
1154         ASSERT(!(flags & XFS_BUF_TRYLOCK));
1155
1156         d = xfs_btree_ptr_to_daddr(cur, ptr);
1157         error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d,
1158                                    mp->m_bsize, flags, bpp);
1159         if (error)
1160                 return error;
1161
1162         ASSERT(*bpp != NULL);
1163         ASSERT(!XFS_BUF_GETERROR(*bpp));
1164
1165         xfs_btree_set_refs(cur, *bpp);
1166         *block = XFS_BUF_TO_BLOCK(*bpp);
1167
1168         error = xfs_btree_check_block(cur, *block, level, *bpp);
1169         if (error)
1170                 xfs_trans_brelse(cur->bc_tp, *bpp);
1171         return error;
1172 }
1173
1174 /*
1175  * Copy keys from one btree block to another.
1176  */
1177 STATIC void
1178 xfs_btree_copy_keys(
1179         struct xfs_btree_cur    *cur,
1180         union xfs_btree_key     *dst_key,
1181         union xfs_btree_key     *src_key,
1182         int                     numkeys)
1183 {
1184         ASSERT(numkeys >= 0);
1185         memcpy(dst_key, src_key, numkeys * cur->bc_ops->key_len);
1186 }
1187
1188 /*
1189  * Copy records from one btree block to another.
1190  */
1191 STATIC void
1192 xfs_btree_copy_recs(
1193         struct xfs_btree_cur    *cur,
1194         union xfs_btree_rec     *dst_rec,
1195         union xfs_btree_rec     *src_rec,
1196         int                     numrecs)
1197 {
1198         ASSERT(numrecs >= 0);
1199         memcpy(dst_rec, src_rec, numrecs * cur->bc_ops->rec_len);
1200 }
1201
1202 /*
1203  * Copy block pointers from one btree block to another.
1204  */
1205 STATIC void
1206 xfs_btree_copy_ptrs(
1207         struct xfs_btree_cur    *cur,
1208         union xfs_btree_ptr     *dst_ptr,
1209         union xfs_btree_ptr     *src_ptr,
1210         int                     numptrs)
1211 {
1212         ASSERT(numptrs >= 0);
1213         memcpy(dst_ptr, src_ptr, numptrs * xfs_btree_ptr_len(cur));
1214 }
1215
1216 /*
1217  * Shift keys one index left/right inside a single btree block.
1218  */
1219 STATIC void
1220 xfs_btree_shift_keys(
1221         struct xfs_btree_cur    *cur,
1222         union xfs_btree_key     *key,
1223         int                     dir,
1224         int                     numkeys)
1225 {
1226         char                    *dst_key;
1227
1228         ASSERT(numkeys >= 0);
1229         ASSERT(dir == 1 || dir == -1);
1230
1231         dst_key = (char *)key + (dir * cur->bc_ops->key_len);
1232         memmove(dst_key, key, numkeys * cur->bc_ops->key_len);
1233 }
1234
1235 /*
1236  * Shift records one index left/right inside a single btree block.
1237  */
1238 STATIC void
1239 xfs_btree_shift_recs(
1240         struct xfs_btree_cur    *cur,
1241         union xfs_btree_rec     *rec,
1242         int                     dir,
1243         int                     numrecs)
1244 {
1245         char                    *dst_rec;
1246
1247         ASSERT(numrecs >= 0);
1248         ASSERT(dir == 1 || dir == -1);
1249
1250         dst_rec = (char *)rec + (dir * cur->bc_ops->rec_len);
1251         memmove(dst_rec, rec, numrecs * cur->bc_ops->rec_len);
1252 }
1253
1254 /*
1255  * Shift block pointers one index left/right inside a single btree block.
1256  */
1257 STATIC void
1258 xfs_btree_shift_ptrs(
1259         struct xfs_btree_cur    *cur,
1260         union xfs_btree_ptr     *ptr,
1261         int                     dir,
1262         int                     numptrs)
1263 {
1264         char                    *dst_ptr;
1265
1266         ASSERT(numptrs >= 0);
1267         ASSERT(dir == 1 || dir == -1);
1268
1269         dst_ptr = (char *)ptr + (dir * xfs_btree_ptr_len(cur));
1270         memmove(dst_ptr, ptr, numptrs * xfs_btree_ptr_len(cur));
1271 }
1272
1273 /*
1274  * Log key values from the btree block.
1275  */
1276 STATIC void
1277 xfs_btree_log_keys(
1278         struct xfs_btree_cur    *cur,
1279         struct xfs_buf          *bp,
1280         int                     first,
1281         int                     last)
1282 {
1283         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1284         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1285
1286         if (bp) {
1287                 xfs_trans_log_buf(cur->bc_tp, bp,
1288                                   xfs_btree_key_offset(cur, first),
1289                                   xfs_btree_key_offset(cur, last + 1) - 1);
1290         } else {
1291                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1292                                 xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1293         }
1294
1295         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1296 }
1297
1298 /*
1299  * Log record values from the btree block.
1300  */
1301 STATIC void
1302 xfs_btree_log_recs(
1303         struct xfs_btree_cur    *cur,
1304         struct xfs_buf          *bp,
1305         int                     first,
1306         int                     last)
1307 {
1308         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1309         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1310
1311         xfs_trans_log_buf(cur->bc_tp, bp,
1312                           xfs_btree_rec_offset(cur, first),
1313                           xfs_btree_rec_offset(cur, last + 1) - 1);
1314
1315         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1316 }
1317
1318 /*
1319  * Log block pointer fields from a btree block (nonleaf).
1320  */
1321 STATIC void
1322 xfs_btree_log_ptrs(
1323         struct xfs_btree_cur    *cur,   /* btree cursor */
1324         struct xfs_buf          *bp,    /* buffer containing btree block */
1325         int                     first,  /* index of first pointer to log */
1326         int                     last)   /* index of last pointer to log */
1327 {
1328         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1329         XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);
1330
1331         if (bp) {
1332                 struct xfs_btree_block  *block = XFS_BUF_TO_BLOCK(bp);
1333                 int                     level = xfs_btree_get_level(block);
1334
1335                 xfs_trans_log_buf(cur->bc_tp, bp,
1336                                 xfs_btree_ptr_offset(cur, first, level),
1337                                 xfs_btree_ptr_offset(cur, last + 1, level) - 1);
1338         } else {
1339                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1340                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1341         }
1342
1343         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1344 }
1345
1346 /*
1347  * Log fields from a btree block header.
1348  */
1349 STATIC void
1350 xfs_btree_log_block(
1351         struct xfs_btree_cur    *cur,   /* btree cursor */
1352         struct xfs_buf          *bp,    /* buffer containing btree block */
1353         int                     fields) /* mask of fields: XFS_BB_... */
1354 {
1355         int                     first;  /* first byte offset logged */
1356         int                     last;   /* last byte offset logged */
1357         static const short      soffsets[] = {  /* table of offsets (short) */
1358                 offsetof(struct xfs_btree_sblock, bb_magic),
1359                 offsetof(struct xfs_btree_sblock, bb_level),
1360                 offsetof(struct xfs_btree_sblock, bb_numrecs),
1361                 offsetof(struct xfs_btree_sblock, bb_leftsib),
1362                 offsetof(struct xfs_btree_sblock, bb_rightsib),
1363                 sizeof(struct xfs_btree_sblock)
1364         };
1365         static const short      loffsets[] = {  /* table of offsets (long) */
1366                 offsetof(struct xfs_btree_lblock, bb_magic),
1367                 offsetof(struct xfs_btree_lblock, bb_level),
1368                 offsetof(struct xfs_btree_lblock, bb_numrecs),
1369                 offsetof(struct xfs_btree_lblock, bb_leftsib),
1370                 offsetof(struct xfs_btree_lblock, bb_rightsib),
1371                 sizeof(struct xfs_btree_lblock)
1372         };
1373
1374         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1375         XFS_BTREE_TRACE_ARGBI(cur, bp, fields);
1376
1377         if (bp) {
1378                 xfs_btree_offsets(fields,
1379                                   (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?
1380                                         loffsets : soffsets,
1381                                   XFS_BB_NUM_BITS, &first, &last);
1382                 xfs_trans_log_buf(cur->bc_tp, bp, first, last);
1383         } else {
1384                 xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip,
1385                         xfs_ilog_fbroot(cur->bc_private.b.whichfork));
1386         }
1387
1388         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1389 }
1390
1391 /*
1392  * Increment cursor by one record at the level.
1393  * For nonzero levels the leaf-ward information is untouched.
1394  */
1395 int                                             /* error */
1396 xfs_btree_increment(
1397         struct xfs_btree_cur    *cur,
1398         int                     level,
1399         int                     *stat)          /* success/failure */
1400 {
1401         struct xfs_btree_block  *block;
1402         union xfs_btree_ptr     ptr;
1403         struct xfs_buf          *bp;
1404         int                     error;          /* error return value */
1405         int                     lev;
1406
1407         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1408         XFS_BTREE_TRACE_ARGI(cur, level);
1409
1410         ASSERT(level < cur->bc_nlevels);
1411
1412         /* Read-ahead to the right at this level. */
1413         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1414
1415         /* Get a pointer to the btree block. */
1416         block = xfs_btree_get_block(cur, level, &bp);
1417
1418 #ifdef DEBUG
1419         error = xfs_btree_check_block(cur, block, level, bp);
1420         if (error)
1421                 goto error0;
1422 #endif
1423
1424         /* We're done if we remain in the block after the increment. */
1425         if (++cur->bc_ptrs[level] <= xfs_btree_get_numrecs(block))
1426                 goto out1;
1427
1428         /* Fail if we just went off the right edge of the tree. */
1429         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1430         if (xfs_btree_ptr_is_null(cur, &ptr))
1431                 goto out0;
1432
1433         XFS_BTREE_STATS_INC(cur, increment);
1434
1435         /*
1436          * March up the tree incrementing pointers.
1437          * Stop when we don't go off the right edge of a block.
1438          */
1439         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1440                 block = xfs_btree_get_block(cur, lev, &bp);
1441
1442 #ifdef DEBUG
1443                 error = xfs_btree_check_block(cur, block, lev, bp);
1444                 if (error)
1445                         goto error0;
1446 #endif
1447
1448                 if (++cur->bc_ptrs[lev] <= xfs_btree_get_numrecs(block))
1449                         break;
1450
1451                 /* Read-ahead the right block for the next loop. */
1452                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1453         }
1454
1455         /*
1456          * If we went off the root then we are either seriously
1457          * confused or have the tree root in an inode.
1458          */
1459         if (lev == cur->bc_nlevels) {
1460                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1461                         goto out0;
1462                 ASSERT(0);
1463                 error = EFSCORRUPTED;
1464                 goto error0;
1465         }
1466         ASSERT(lev < cur->bc_nlevels);
1467
1468         /*
1469          * Now walk back down the tree, fixing up the cursor's buffer
1470          * pointers and key numbers.
1471          */
1472         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1473                 union xfs_btree_ptr     *ptrp;
1474
1475                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1476                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1477                                                         0, &block, &bp);
1478                 if (error)
1479                         goto error0;
1480
1481                 xfs_btree_setbuf(cur, lev, bp);
1482                 cur->bc_ptrs[lev] = 1;
1483         }
1484 out1:
1485         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1486         *stat = 1;
1487         return 0;
1488
1489 out0:
1490         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1491         *stat = 0;
1492         return 0;
1493
1494 error0:
1495         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1496         return error;
1497 }
1498
1499 /*
1500  * Decrement cursor by one record at the level.
1501  * For nonzero levels the leaf-ward information is untouched.
1502  */
1503 int                                             /* error */
1504 xfs_btree_decrement(
1505         struct xfs_btree_cur    *cur,
1506         int                     level,
1507         int                     *stat)          /* success/failure */
1508 {
1509         struct xfs_btree_block  *block;
1510         xfs_buf_t               *bp;
1511         int                     error;          /* error return value */
1512         int                     lev;
1513         union xfs_btree_ptr     ptr;
1514
1515         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1516         XFS_BTREE_TRACE_ARGI(cur, level);
1517
1518         ASSERT(level < cur->bc_nlevels);
1519
1520         /* Read-ahead to the left at this level. */
1521         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1522
1523         /* We're done if we remain in the block after the decrement. */
1524         if (--cur->bc_ptrs[level] > 0)
1525                 goto out1;
1526
1527         /* Get a pointer to the btree block. */
1528         block = xfs_btree_get_block(cur, level, &bp);
1529
1530 #ifdef DEBUG
1531         error = xfs_btree_check_block(cur, block, level, bp);
1532         if (error)
1533                 goto error0;
1534 #endif
1535
1536         /* Fail if we just went off the left edge of the tree. */
1537         xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_LEFTSIB);
1538         if (xfs_btree_ptr_is_null(cur, &ptr))
1539                 goto out0;
1540
1541         XFS_BTREE_STATS_INC(cur, decrement);
1542
1543         /*
1544          * March up the tree decrementing pointers.
1545          * Stop when we don't go off the left edge of a block.
1546          */
1547         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1548                 if (--cur->bc_ptrs[lev] > 0)
1549                         break;
1550                 /* Read-ahead the left block for the next loop. */
1551                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1552         }
1553
1554         /*
1555          * If we went off the root then we are seriously confused.
1556          * or the root of the tree is in an inode.
1557          */
1558         if (lev == cur->bc_nlevels) {
1559                 if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)
1560                         goto out0;
1561                 ASSERT(0);
1562                 error = EFSCORRUPTED;
1563                 goto error0;
1564         }
1565         ASSERT(lev < cur->bc_nlevels);
1566
1567         /*
1568          * Now walk back down the tree, fixing up the cursor's buffer
1569          * pointers and key numbers.
1570          */
1571         for (block = xfs_btree_get_block(cur, lev, &bp); lev > level; ) {
1572                 union xfs_btree_ptr     *ptrp;
1573
1574                 ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block);
1575                 error = xfs_btree_read_buf_block(cur, ptrp, --lev,
1576                                                         0, &block, &bp);
1577                 if (error)
1578                         goto error0;
1579                 xfs_btree_setbuf(cur, lev, bp);
1580                 cur->bc_ptrs[lev] = xfs_btree_get_numrecs(block);
1581         }
1582 out1:
1583         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1584         *stat = 1;
1585         return 0;
1586
1587 out0:
1588         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1589         *stat = 0;
1590         return 0;
1591
1592 error0:
1593         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1594         return error;
1595 }
1596
1597 STATIC int
1598 xfs_btree_lookup_get_block(
1599         struct xfs_btree_cur    *cur,   /* btree cursor */
1600         int                     level,  /* level in the btree */
1601         union xfs_btree_ptr     *pp,    /* ptr to btree block */
1602         struct xfs_btree_block  **blkp) /* return btree block */
1603 {
1604         struct xfs_buf          *bp;    /* buffer pointer for btree block */
1605         int                     error = 0;
1606
1607         /* special case the root block if in an inode */
1608         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1609             (level == cur->bc_nlevels - 1)) {
1610                 *blkp = xfs_btree_get_iroot(cur);
1611                 return 0;
1612         }
1613
1614         /*
1615          * If the old buffer at this level for the disk address we are
1616          * looking for re-use it.
1617          *
1618          * Otherwise throw it away and get a new one.
1619          */
1620         bp = cur->bc_bufs[level];
1621         if (bp && XFS_BUF_ADDR(bp) == xfs_btree_ptr_to_daddr(cur, pp)) {
1622                 *blkp = XFS_BUF_TO_BLOCK(bp);
1623                 return 0;
1624         }
1625
1626         error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp);
1627         if (error)
1628                 return error;
1629
1630         xfs_btree_setbuf(cur, level, bp);
1631         return 0;
1632 }
1633
1634 /*
1635  * Get current search key.  For level 0 we don't actually have a key
1636  * structure so we make one up from the record.  For all other levels
1637  * we just return the right key.
1638  */
1639 STATIC union xfs_btree_key *
1640 xfs_lookup_get_search_key(
1641         struct xfs_btree_cur    *cur,
1642         int                     level,
1643         int                     keyno,
1644         struct xfs_btree_block  *block,
1645         union xfs_btree_key     *kp)
1646 {
1647         if (level == 0) {
1648                 cur->bc_ops->init_key_from_rec(kp,
1649                                 xfs_btree_rec_addr(cur, keyno, block));
1650                 return kp;
1651         }
1652
1653         return xfs_btree_key_addr(cur, keyno, block);
1654 }
1655
1656 /*
1657  * Lookup the record.  The cursor is made to point to it, based on dir.
1658  * Return 0 if can't find any such record, 1 for success.
1659  */
1660 int                                     /* error */
1661 xfs_btree_lookup(
1662         struct xfs_btree_cur    *cur,   /* btree cursor */
1663         xfs_lookup_t            dir,    /* <=, ==, or >= */
1664         int                     *stat)  /* success/failure */
1665 {
1666         struct xfs_btree_block  *block; /* current btree block */
1667         __int64_t               diff;   /* difference for the current key */
1668         int                     error;  /* error return value */
1669         int                     keyno;  /* current key number */
1670         int                     level;  /* level in the btree */
1671         union xfs_btree_ptr     *pp;    /* ptr to btree block */
1672         union xfs_btree_ptr     ptr;    /* ptr to btree block */
1673
1674         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1675         XFS_BTREE_TRACE_ARGI(cur, dir);
1676
1677         XFS_BTREE_STATS_INC(cur, lookup);
1678
1679         block = NULL;
1680         keyno = 0;
1681
1682         /* initialise start pointer from cursor */
1683         cur->bc_ops->init_ptr_from_cur(cur, &ptr);
1684         pp = &ptr;
1685
1686         /*
1687          * Iterate over each level in the btree, starting at the root.
1688          * For each level above the leaves, find the key we need, based
1689          * on the lookup record, then follow the corresponding block
1690          * pointer down to the next level.
1691          */
1692         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
1693                 /* Get the block we need to do the lookup on. */
1694                 error = xfs_btree_lookup_get_block(cur, level, pp, &block);
1695                 if (error)
1696                         goto error0;
1697
1698                 if (diff == 0) {
1699                         /*
1700                          * If we already had a key match at a higher level, we
1701                          * know we need to use the first entry in this block.
1702                          */
1703                         keyno = 1;
1704                 } else {
1705                         /* Otherwise search this block. Do a binary search. */
1706
1707                         int     high;   /* high entry number */
1708                         int     low;    /* low entry number */
1709
1710                         /* Set low and high entry numbers, 1-based. */
1711                         low = 1;
1712                         high = xfs_btree_get_numrecs(block);
1713                         if (!high) {
1714                                 /* Block is empty, must be an empty leaf. */
1715                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1716
1717                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1718                                 XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1719                                 *stat = 0;
1720                                 return 0;
1721                         }
1722
1723                         /* Binary search the block. */
1724                         while (low <= high) {
1725                                 union xfs_btree_key     key;
1726                                 union xfs_btree_key     *kp;
1727
1728                                 XFS_BTREE_STATS_INC(cur, compare);
1729
1730                                 /* keyno is average of low and high. */
1731                                 keyno = (low + high) >> 1;
1732
1733                                 /* Get current search key */
1734                                 kp = xfs_lookup_get_search_key(cur, level,
1735                                                 keyno, block, &key);
1736
1737                                 /*
1738                                  * Compute difference to get next direction:
1739                                  *  - less than, move right
1740                                  *  - greater than, move left
1741                                  *  - equal, we're done
1742                                  */
1743                                 diff = cur->bc_ops->key_diff(cur, kp);
1744                                 if (diff < 0)
1745                                         low = keyno + 1;
1746                                 else if (diff > 0)
1747                                         high = keyno - 1;
1748                                 else
1749                                         break;
1750                         }
1751                 }
1752
1753                 /*
1754                  * If there are more levels, set up for the next level
1755                  * by getting the block number and filling in the cursor.
1756                  */
1757                 if (level > 0) {
1758                         /*
1759                          * If we moved left, need the previous key number,
1760                          * unless there isn't one.
1761                          */
1762                         if (diff > 0 && --keyno < 1)
1763                                 keyno = 1;
1764                         pp = xfs_btree_ptr_addr(cur, keyno, block);
1765
1766 #ifdef DEBUG
1767                         error = xfs_btree_check_ptr(cur, pp, 0, level);
1768                         if (error)
1769                                 goto error0;
1770 #endif
1771                         cur->bc_ptrs[level] = keyno;
1772                 }
1773         }
1774
1775         /* Done with the search. See if we need to adjust the results. */
1776         if (dir != XFS_LOOKUP_LE && diff < 0) {
1777                 keyno++;
1778                 /*
1779                  * If ge search and we went off the end of the block, but it's
1780                  * not the last block, we're in the wrong block.
1781                  */
1782                 xfs_btree_get_sibling(cur, block, &ptr, XFS_BB_RIGHTSIB);
1783                 if (dir == XFS_LOOKUP_GE &&
1784                     keyno > xfs_btree_get_numrecs(block) &&
1785                     !xfs_btree_ptr_is_null(cur, &ptr)) {
1786                         int     i;
1787
1788                         cur->bc_ptrs[0] = keyno;
1789                         error = xfs_btree_increment(cur, 0, &i);
1790                         if (error)
1791                                 goto error0;
1792                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1793                         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1794                         *stat = 1;
1795                         return 0;
1796                 }
1797         } else if (dir == XFS_LOOKUP_LE && diff > 0)
1798                 keyno--;
1799         cur->bc_ptrs[0] = keyno;
1800
1801         /* Return if we succeeded or not. */
1802         if (keyno == 0 || keyno > xfs_btree_get_numrecs(block))
1803                 *stat = 0;
1804         else if (dir != XFS_LOOKUP_EQ || diff == 0)
1805                 *stat = 1;
1806         else
1807                 *stat = 0;
1808         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1809         return 0;
1810
1811 error0:
1812         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1813         return error;
1814 }
1815
1816 /*
1817  * Update keys at all levels from here to the root along the cursor's path.
1818  */
1819 int
1820 xfs_btree_updkey(
1821         struct xfs_btree_cur    *cur,
1822         union xfs_btree_key     *keyp,
1823         int                     level)
1824 {
1825         struct xfs_btree_block  *block;
1826         struct xfs_buf          *bp;
1827         union xfs_btree_key     *kp;
1828         int                     ptr;
1829
1830         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1831         XFS_BTREE_TRACE_ARGIK(cur, level, keyp);
1832
1833         ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) || level >= 1);
1834
1835         /*
1836          * Go up the tree from this level toward the root.
1837          * At each level, update the key value to the value input.
1838          * Stop when we reach a level where the cursor isn't pointing
1839          * at the first entry in the block.
1840          */
1841         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1842 #ifdef DEBUG
1843                 int             error;
1844 #endif
1845                 block = xfs_btree_get_block(cur, level, &bp);
1846 #ifdef DEBUG
1847                 error = xfs_btree_check_block(cur, block, level, bp);
1848                 if (error) {
1849                         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1850                         return error;
1851                 }
1852 #endif
1853                 ptr = cur->bc_ptrs[level];
1854                 kp = xfs_btree_key_addr(cur, ptr, block);
1855                 xfs_btree_copy_keys(cur, kp, keyp, 1);
1856                 xfs_btree_log_keys(cur, bp, ptr, ptr);
1857         }
1858
1859         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1860         return 0;
1861 }
1862
1863 /*
1864  * Update the record referred to by cur to the value in the
1865  * given record. This either works (return 0) or gets an
1866  * EFSCORRUPTED error.
1867  */
1868 int
1869 xfs_btree_update(
1870         struct xfs_btree_cur    *cur,
1871         union xfs_btree_rec     *rec)
1872 {
1873         struct xfs_btree_block  *block;
1874         struct xfs_buf          *bp;
1875         int                     error;
1876         int                     ptr;
1877         union xfs_btree_rec     *rp;
1878
1879         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1880         XFS_BTREE_TRACE_ARGR(cur, rec);
1881
1882         /* Pick up the current block. */
1883         block = xfs_btree_get_block(cur, 0, &bp);
1884
1885 #ifdef DEBUG
1886         error = xfs_btree_check_block(cur, block, 0, bp);
1887         if (error)
1888                 goto error0;
1889 #endif
1890         /* Get the address of the rec to be updated. */
1891         ptr = cur->bc_ptrs[0];
1892         rp = xfs_btree_rec_addr(cur, ptr, block);
1893
1894         /* Fill in the new contents and log them. */
1895         xfs_btree_copy_recs(cur, rp, rec, 1);
1896         xfs_btree_log_recs(cur, bp, ptr, ptr);
1897
1898         /*
1899          * If we are tracking the last record in the tree and
1900          * we are at the far right edge of the tree, update it.
1901          */
1902         if (xfs_btree_is_lastrec(cur, block, 0)) {
1903                 cur->bc_ops->update_lastrec(cur, block, rec,
1904                                             ptr, LASTREC_UPDATE);
1905         }
1906
1907         /* Updating first rec in leaf. Pass new key value up to our parent. */
1908         if (ptr == 1) {
1909                 union xfs_btree_key     key;
1910
1911                 cur->bc_ops->init_key_from_rec(&key, rec);
1912                 error = xfs_btree_updkey(cur, &key, 1);
1913                 if (error)
1914                         goto error0;
1915         }
1916
1917         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
1918         return 0;
1919
1920 error0:
1921         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
1922         return error;
1923 }
1924
1925 /*
1926  * Move 1 record left from cur/level if possible.
1927  * Update cur to reflect the new path.
1928  */
1929 int                                     /* error */
1930 xfs_btree_lshift(
1931         struct xfs_btree_cur    *cur,
1932         int                     level,
1933         int                     *stat)          /* success/failure */
1934 {
1935         union xfs_btree_key     key;            /* btree key */
1936         struct xfs_buf          *lbp;           /* left buffer pointer */
1937         struct xfs_btree_block  *left;          /* left btree block */
1938         int                     lrecs;          /* left record count */
1939         struct xfs_buf          *rbp;           /* right buffer pointer */
1940         struct xfs_btree_block  *right;         /* right btree block */
1941         int                     rrecs;          /* right record count */
1942         union xfs_btree_ptr     lptr;           /* left btree pointer */
1943         union xfs_btree_key     *rkp = NULL;    /* right btree key */
1944         union xfs_btree_ptr     *rpp = NULL;    /* right address pointer */
1945         union xfs_btree_rec     *rrp = NULL;    /* right record pointer */
1946         int                     error;          /* error return value */
1947
1948         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
1949         XFS_BTREE_TRACE_ARGI(cur, level);
1950
1951         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
1952             level == cur->bc_nlevels - 1)
1953                 goto out0;
1954
1955         /* Set up variables for this block as "right". */
1956         right = xfs_btree_get_block(cur, level, &rbp);
1957
1958 #ifdef DEBUG
1959         error = xfs_btree_check_block(cur, right, level, rbp);
1960         if (error)
1961                 goto error0;
1962 #endif
1963
1964         /* If we've got no left sibling then we can't shift an entry left. */
1965         xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
1966         if (xfs_btree_ptr_is_null(cur, &lptr))
1967                 goto out0;
1968
1969         /*
1970          * If the cursor entry is the one that would be moved, don't
1971          * do it... it's too complicated.
1972          */
1973         if (cur->bc_ptrs[level] <= 1)
1974                 goto out0;
1975
1976         /* Set up the left neighbor as "left". */
1977         error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp);
1978         if (error)
1979                 goto error0;
1980
1981         /* If it's full, it can't take another entry. */
1982         lrecs = xfs_btree_get_numrecs(left);
1983         if (lrecs == cur->bc_ops->get_maxrecs(cur, level))
1984                 goto out0;
1985
1986         rrecs = xfs_btree_get_numrecs(right);
1987
1988         /*
1989          * We add one entry to the left side and remove one for the right side.
1990          * Accout for it here, the changes will be updated on disk and logged
1991          * later.
1992          */
1993         lrecs++;
1994         rrecs--;
1995
1996         XFS_BTREE_STATS_INC(cur, lshift);
1997         XFS_BTREE_STATS_ADD(cur, moves, 1);
1998
1999         /*
2000          * If non-leaf, copy a key and a ptr to the left block.
2001          * Log the changes to the left block.
2002          */
2003         if (level > 0) {
2004                 /* It's a non-leaf.  Move keys and pointers. */
2005                 union xfs_btree_key     *lkp;   /* left btree key */
2006                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2007
2008                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2009                 rkp = xfs_btree_key_addr(cur, 1, right);
2010
2011                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2012                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2013 #ifdef DEBUG
2014                 error = xfs_btree_check_ptr(cur, rpp, 0, level);
2015                 if (error)
2016                         goto error0;
2017 #endif
2018                 xfs_btree_copy_keys(cur, lkp, rkp, 1);
2019                 xfs_btree_copy_ptrs(cur, lpp, rpp, 1);
2020
2021                 xfs_btree_log_keys(cur, lbp, lrecs, lrecs);
2022                 xfs_btree_log_ptrs(cur, lbp, lrecs, lrecs);
2023
2024                 xfs_btree_check_key(cur->bc_btnum,
2025                                     xfs_btree_key_addr(cur, lrecs - 1, left),
2026                                     lkp);
2027         } else {
2028                 /* It's a leaf.  Move records.  */
2029                 union xfs_btree_rec     *lrp;   /* left record pointer */
2030
2031                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2032                 rrp = xfs_btree_rec_addr(cur, 1, right);
2033
2034                 xfs_btree_copy_recs(cur, lrp, rrp, 1);
2035                 xfs_btree_log_recs(cur, lbp, lrecs, lrecs);
2036
2037                 xfs_btree_check_rec(cur->bc_btnum,
2038                                     xfs_btree_rec_addr(cur, lrecs - 1, left),
2039                                     lrp);
2040         }
2041
2042         xfs_btree_set_numrecs(left, lrecs);
2043         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2044
2045         xfs_btree_set_numrecs(right, rrecs);
2046         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2047
2048         /*
2049          * Slide the contents of right down one entry.
2050          */
2051         XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
2052         if (level > 0) {
2053                 /* It's a nonleaf. operate on keys and ptrs */
2054 #ifdef DEBUG
2055                 int                     i;              /* loop index */
2056
2057                 for (i = 0; i < rrecs; i++) {
2058                         error = xfs_btree_check_ptr(cur, rpp, i + 1, level);
2059                         if (error)
2060                                 goto error0;
2061                 }
2062 #endif
2063                 xfs_btree_shift_keys(cur,
2064                                 xfs_btree_key_addr(cur, 2, right),
2065                                 -1, rrecs);
2066                 xfs_btree_shift_ptrs(cur,
2067                                 xfs_btree_ptr_addr(cur, 2, right),
2068                                 -1, rrecs);
2069
2070                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2071                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2072         } else {
2073                 /* It's a leaf. operate on records */
2074                 xfs_btree_shift_recs(cur,
2075                         xfs_btree_rec_addr(cur, 2, right),
2076                         -1, rrecs);
2077                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2078
2079                 /*
2080                  * If it's the first record in the block, we'll need a key
2081                  * structure to pass up to the next level (updkey).
2082                  */
2083                 cur->bc_ops->init_key_from_rec(&key,
2084                         xfs_btree_rec_addr(cur, 1, right));
2085                 rkp = &key;
2086         }
2087
2088         /* Update the parent key values of right. */
2089         error = xfs_btree_updkey(cur, rkp, level + 1);
2090         if (error)
2091                 goto error0;
2092
2093         /* Slide the cursor value left one. */
2094         cur->bc_ptrs[level]--;
2095
2096         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2097         *stat = 1;
2098         return 0;
2099
2100 out0:
2101         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2102         *stat = 0;
2103         return 0;
2104
2105 error0:
2106         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2107         return error;
2108 }
2109
2110 /*
2111  * Move 1 record right from cur/level if possible.
2112  * Update cur to reflect the new path.
2113  */
2114 int                                     /* error */
2115 xfs_btree_rshift(
2116         struct xfs_btree_cur    *cur,
2117         int                     level,
2118         int                     *stat)          /* success/failure */
2119 {
2120         union xfs_btree_key     key;            /* btree key */
2121         struct xfs_buf          *lbp;           /* left buffer pointer */
2122         struct xfs_btree_block  *left;          /* left btree block */
2123         struct xfs_buf          *rbp;           /* right buffer pointer */
2124         struct xfs_btree_block  *right;         /* right btree block */
2125         struct xfs_btree_cur    *tcur;          /* temporary btree cursor */
2126         union xfs_btree_ptr     rptr;           /* right block pointer */
2127         union xfs_btree_key     *rkp;           /* right btree key */
2128         int                     rrecs;          /* right record count */
2129         int                     lrecs;          /* left record count */
2130         int                     error;          /* error return value */
2131         int                     i;              /* loop counter */
2132
2133         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2134         XFS_BTREE_TRACE_ARGI(cur, level);
2135
2136         if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) &&
2137             (level == cur->bc_nlevels - 1))
2138                 goto out0;
2139
2140         /* Set up variables for this block as "left". */
2141         left = xfs_btree_get_block(cur, level, &lbp);
2142
2143 #ifdef DEBUG
2144         error = xfs_btree_check_block(cur, left, level, lbp);
2145         if (error)
2146                 goto error0;
2147 #endif
2148
2149         /* If we've got no right sibling then we can't shift an entry right. */
2150         xfs_btree_get_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2151         if (xfs_btree_ptr_is_null(cur, &rptr))
2152                 goto out0;
2153
2154         /*
2155          * If the cursor entry is the one that would be moved, don't
2156          * do it... it's too complicated.
2157          */
2158         lrecs = xfs_btree_get_numrecs(left);
2159         if (cur->bc_ptrs[level] >= lrecs)
2160                 goto out0;
2161
2162         /* Set up the right neighbor as "right". */
2163         error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp);
2164         if (error)
2165                 goto error0;
2166
2167         /* If it's full, it can't take another entry. */
2168         rrecs = xfs_btree_get_numrecs(right);
2169         if (rrecs == cur->bc_ops->get_maxrecs(cur, level))
2170                 goto out0;
2171
2172         XFS_BTREE_STATS_INC(cur, rshift);
2173         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2174
2175         /*
2176          * Make a hole at the start of the right neighbor block, then
2177          * copy the last left block entry to the hole.
2178          */
2179         if (level > 0) {
2180                 /* It's a nonleaf. make a hole in the keys and ptrs */
2181                 union xfs_btree_key     *lkp;
2182                 union xfs_btree_ptr     *lpp;
2183                 union xfs_btree_ptr     *rpp;
2184
2185                 lkp = xfs_btree_key_addr(cur, lrecs, left);
2186                 lpp = xfs_btree_ptr_addr(cur, lrecs, left);
2187                 rkp = xfs_btree_key_addr(cur, 1, right);
2188                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2189
2190 #ifdef DEBUG
2191                 for (i = rrecs - 1; i >= 0; i--) {
2192                         error = xfs_btree_check_ptr(cur, rpp, i, level);
2193                         if (error)
2194                                 goto error0;
2195                 }
2196 #endif
2197
2198                 xfs_btree_shift_keys(cur, rkp, 1, rrecs);
2199                 xfs_btree_shift_ptrs(cur, rpp, 1, rrecs);
2200
2201 #ifdef DEBUG
2202                 error = xfs_btree_check_ptr(cur, lpp, 0, level);
2203                 if (error)
2204                         goto error0;
2205 #endif
2206
2207                 /* Now put the new data in, and log it. */
2208                 xfs_btree_copy_keys(cur, rkp, lkp, 1);
2209                 xfs_btree_copy_ptrs(cur, rpp, lpp, 1);
2210
2211                 xfs_btree_log_keys(cur, rbp, 1, rrecs + 1);
2212                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs + 1);
2213
2214                 xfs_btree_check_key(cur->bc_btnum, rkp,
2215                                     xfs_btree_key_addr(cur, 2, right));
2216         } else {
2217                 /* It's a leaf. make a hole in the records */
2218                 union xfs_btree_rec     *lrp;
2219                 union xfs_btree_rec     *rrp;
2220
2221                 lrp = xfs_btree_rec_addr(cur, lrecs, left);
2222                 rrp = xfs_btree_rec_addr(cur, 1, right);
2223
2224                 xfs_btree_shift_recs(cur, rrp, 1, rrecs);
2225
2226                 /* Now put the new data in, and log it. */
2227                 xfs_btree_copy_recs(cur, rrp, lrp, 1);
2228                 xfs_btree_log_recs(cur, rbp, 1, rrecs + 1);
2229
2230                 cur->bc_ops->init_key_from_rec(&key, rrp);
2231                 rkp = &key;
2232
2233                 xfs_btree_check_rec(cur->bc_btnum, rrp,
2234                                     xfs_btree_rec_addr(cur, 2, right));
2235         }
2236
2237         /*
2238          * Decrement and log left's numrecs, bump and log right's numrecs.
2239          */
2240         xfs_btree_set_numrecs(left, --lrecs);
2241         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS);
2242
2243         xfs_btree_set_numrecs(right, ++rrecs);
2244         xfs_btree_log_block(cur, rbp, XFS_BB_NUMRECS);
2245
2246         /*
2247          * Using a temporary cursor, update the parent key values of the
2248          * block on the right.
2249          */
2250         error = xfs_btree_dup_cursor(cur, &tcur);
2251         if (error)
2252                 goto error0;
2253         i = xfs_btree_lastrec(tcur, level);
2254         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
2255
2256         error = xfs_btree_increment(tcur, level, &i);
2257         if (error)
2258                 goto error1;
2259
2260         error = xfs_btree_updkey(tcur, rkp, level + 1);
2261         if (error)
2262                 goto error1;
2263
2264         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
2265
2266         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2267         *stat = 1;
2268         return 0;
2269
2270 out0:
2271         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2272         *stat = 0;
2273         return 0;
2274
2275 error0:
2276         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2277         return error;
2278
2279 error1:
2280         XFS_BTREE_TRACE_CURSOR(tcur, XBT_ERROR);
2281         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
2282         return error;
2283 }
2284
2285 /*
2286  * Split cur/level block in half.
2287  * Return new block number and the key to its first
2288  * record (to be inserted into parent).
2289  */
2290 int                                             /* error */
2291 xfs_btree_split(
2292         struct xfs_btree_cur    *cur,
2293         int                     level,
2294         union xfs_btree_ptr     *ptrp,
2295         union xfs_btree_key     *key,
2296         struct xfs_btree_cur    **curp,
2297         int                     *stat)          /* success/failure */
2298 {
2299         union xfs_btree_ptr     lptr;           /* left sibling block ptr */
2300         struct xfs_buf          *lbp;           /* left buffer pointer */
2301         struct xfs_btree_block  *left;          /* left btree block */
2302         union xfs_btree_ptr     rptr;           /* right sibling block ptr */
2303         struct xfs_buf          *rbp;           /* right buffer pointer */
2304         struct xfs_btree_block  *right;         /* right btree block */
2305         union xfs_btree_ptr     rrptr;          /* right-right sibling ptr */
2306         struct xfs_buf          *rrbp;          /* right-right buffer pointer */
2307         struct xfs_btree_block  *rrblock;       /* right-right btree block */
2308         int                     lrecs;
2309         int                     rrecs;
2310         int                     src_index;
2311         int                     error;          /* error return value */
2312 #ifdef DEBUG
2313         int                     i;
2314 #endif
2315
2316         XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);
2317         XFS_BTREE_TRACE_ARGIPK(cur, level, *ptrp, key);
2318
2319         XFS_BTREE_STATS_INC(cur, split);
2320
2321         /* Set up left block (current one). */
2322         left = xfs_btree_get_block(cur, level, &lbp);
2323
2324 #ifdef DEBUG
2325         error = xfs_btree_check_block(cur, left, level, lbp);
2326         if (error)
2327                 goto error0;
2328 #endif
2329
2330         xfs_btree_buf_to_ptr(cur, lbp, &lptr);
2331
2332         /* Allocate the new block. If we can't do it, we're toast. Give up. */
2333         error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat);
2334         if (error)
2335                 goto error0;
2336         if (*stat == 0)
2337                 goto out0;
2338         XFS_BTREE_STATS_INC(cur, alloc);
2339
2340         /* Set up the new block as "right". */
2341         error = xfs_btree_get_buf_block(cur, &rptr, 0, &right, &rbp);
2342         if (error)
2343                 goto error0;
2344
2345         /* Fill in the btree header for the new right block. */
2346         xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right);
2347
2348         /*
2349          * Split the entries between the old and the new block evenly.
2350          * Make sure that if there's an odd number of entries now, that
2351          * each new block will have the same number of entries.
2352          */
2353         lrecs = xfs_btree_get_numrecs(left);
2354         rrecs = lrecs / 2;
2355         if ((lrecs & 1) && cur->bc_ptrs[level] <= rrecs + 1)
2356                 rrecs++;
2357         src_index = (lrecs - rrecs + 1);
2358
2359         XFS_BTREE_STATS_ADD(cur, moves, rrecs);
2360
2361         /*
2362          * Copy btree block entries from the left block over to the
2363          * new block, the right. Update the right block and log the
2364          * changes.
2365          */
2366         if (level > 0) {
2367                 /* It's a non-leaf.  Move keys and pointers. */
2368                 union xfs_btree_key     *lkp;   /* left btree key */
2369                 union xfs_btree_ptr     *lpp;   /* left address pointer */
2370                 union xfs_btree_key     *rkp;   /* right btree key */
2371                 union xfs_btree_ptr     *rpp;   /* right address pointer */
2372
2373                 lkp = xfs_btree_key_addr(cur, src_index, left);
2374                 lpp = xfs_btree_ptr_addr(cur, src_index, left);
2375                 rkp = xfs_btree_key_addr(cur, 1, right);
2376                 rpp = xfs_btree_ptr_addr(cur, 1, right);
2377
2378 #ifdef DEBUG
2379                 for (i = src_index; i < rrecs; i++) {
2380                         error = xfs_btree_check_ptr(cur, lpp, i, level);
2381                         if (error)
2382                                 goto error0;
2383                 }
2384 #endif
2385
2386                 xfs_btree_copy_keys(cur, rkp, lkp, rrecs);
2387                 xfs_btree_copy_ptrs(cur, rpp, lpp, rrecs);
2388
2389                 xfs_btree_log_keys(cur, rbp, 1, rrecs);
2390                 xfs_btree_log_ptrs(cur, rbp, 1, rrecs);
2391
2392                 /* Grab the keys to the entries moved to the right block */
2393                 xfs_btree_copy_keys(cur, key, rkp, 1);
2394         } else {
2395                 /* It's a leaf.  Move records.  */
2396                 union xfs_btree_rec     *lrp;   /* left record pointer */
2397                 union xfs_btree_rec     *rrp;   /* right record pointer */
2398
2399                 lrp = xfs_btree_rec_addr(cur, src_index, left);
2400                 rrp = xfs_btree_rec_addr(cur, 1, right);
2401
2402                 xfs_btree_copy_recs(cur, rrp, lrp, rrecs);
2403                 xfs_btree_log_recs(cur, rbp, 1, rrecs);
2404
2405                 cur->bc_ops->init_key_from_rec(key,
2406                         xfs_btree_rec_addr(cur, 1, right));
2407         }
2408
2409
2410         /*
2411          * Find the left block number by looking in the buffer.
2412          * Adjust numrecs, sibling pointers.
2413          */
2414         xfs_btree_get_sibling(cur, left, &rrptr, XFS_BB_RIGHTSIB);
2415         xfs_btree_set_sibling(cur, right, &rrptr, XFS_BB_RIGHTSIB);
2416         xfs_btree_set_sibling(cur, right, &lptr, XFS_BB_LEFTSIB);
2417         xfs_btree_set_sibling(cur, left, &rptr, XFS_BB_RIGHTSIB);
2418
2419         lrecs -= rrecs;
2420         xfs_btree_set_numrecs(left, lrecs);
2421         xfs_btree_set_numrecs(right, xfs_btree_get_numrecs(right) + rrecs);
2422
2423         xfs_btree_log_block(cur, rbp, XFS_BB_ALL_BITS);
2424         xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
2425
2426         /*
2427          * If there's a block to the new block's right, make that block
2428          * point back to right instead of to left.
2429          */
2430         if (!xfs_btree_ptr_is_null(cur, &rrptr)) {
2431                 error = xfs_btree_read_buf_block(cur, &rrptr, level,
2432                                                         0, &rrblock, &rrbp);
2433                 if (error)
2434                         goto error0;
2435                 xfs_btree_set_sibling(cur, rrblock, &rptr, XFS_BB_LEFTSIB);
2436                 xfs_btree_log_block(cur, rrbp, XFS_BB_LEFTSIB);
2437         }
2438         /*
2439          * If the cursor is really in the right block, move it there.
2440          * If it's just pointing past the last entry in left, then we'll
2441          * insert there, so don't change anything in that case.
2442          */
2443         if (cur->bc_ptrs[level] > lrecs + 1) {
2444                 xfs_btree_setbuf(cur, level, rbp);
2445                 cur->bc_ptrs[level] -= lrecs;
2446         }
2447         /*
2448          * If there are more levels, we'll need another cursor which refers
2449          * the right block, no matter where this cursor was.
2450          */
2451         if (level + 1 < cur->bc_nlevels) {
2452                 error = xfs_btree_dup_cursor(cur, curp);
2453                 if (error)
2454                         goto error0;
2455                 (*curp)->bc_ptrs[level + 1]++;
2456         }
2457         *ptrp = rptr;
2458         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2459         *stat = 1;
2460         return 0;
2461 out0:
2462         XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT);
2463         *stat = 0;
2464         return 0;
2465
2466 error0:
2467         XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
2468         return error;
2469 }