[XFS] Tight loop in xfs_finish_reclaim_all prevented the xfslogd to run
[linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir_sf.h"
35 #include "xfs_dir2_sf.h"
36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h"
38 #include "xfs_inode.h"
39 #include "xfs_btree.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_error.h"
42
43 /*
44  * Cursor allocation zone.
45  */
46 kmem_zone_t     *xfs_btree_cur_zone;
47
48 /*
49  * Btree magic numbers.
50  */
51 const __uint32_t xfs_magics[XFS_BTNUM_MAX] =
52 {
53         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
54 };
55
56 /*
57  * Prototypes for internal routines.
58  */
59
60 /*
61  * Checking routine: return maxrecs for the block.
62  */
63 STATIC int                              /* number of records fitting in block */
64 xfs_btree_maxrecs(
65         xfs_btree_cur_t         *cur,   /* btree cursor */
66         xfs_btree_block_t       *block);/* generic btree block pointer */
67
68 /*
69  * Internal routines.
70  */
71
72 /*
73  * Retrieve the block pointer from the cursor at the given level.
74  * This may be a bmap btree root or from a buffer.
75  */
76 STATIC xfs_btree_block_t *                      /* generic btree block pointer */
77 xfs_btree_get_block(
78         xfs_btree_cur_t         *cur,   /* btree cursor */
79         int                     level,  /* level in btree */
80         struct xfs_buf          **bpp); /* buffer containing the block */
81
82 /*
83  * Checking routine: return maxrecs for the block.
84  */
85 STATIC int                              /* number of records fitting in block */
86 xfs_btree_maxrecs(
87         xfs_btree_cur_t         *cur,   /* btree cursor */
88         xfs_btree_block_t       *block) /* generic btree block pointer */
89 {
90         switch (cur->bc_btnum) {
91         case XFS_BTNUM_BNO:
92         case XFS_BTNUM_CNT:
93                 return (int)XFS_ALLOC_BLOCK_MAXRECS(
94                                 be16_to_cpu(block->bb_h.bb_level), cur);
95         case XFS_BTNUM_BMAP:
96                 return (int)XFS_BMAP_BLOCK_IMAXRECS(
97                                 be16_to_cpu(block->bb_h.bb_level), cur);
98         case XFS_BTNUM_INO:
99                 return (int)XFS_INOBT_BLOCK_MAXRECS(
100                                 be16_to_cpu(block->bb_h.bb_level), cur);
101         default:
102                 ASSERT(0);
103                 return 0;
104         }
105 }
106
107 /*
108  * External routines.
109  */
110
111 #ifdef DEBUG
112 /*
113  * Debug routine: check that block header is ok.
114  */
115 void
116 xfs_btree_check_block(
117         xfs_btree_cur_t         *cur,   /* btree cursor */
118         xfs_btree_block_t       *block, /* generic btree block pointer */
119         int                     level,  /* level of the btree block */
120         xfs_buf_t               *bp)    /* buffer containing block, if any */
121 {
122         if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
123                 xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
124                         bp);
125         else
126                 xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
127                         bp);
128 }
129
130 /*
131  * Debug routine: check that keys are in the right order.
132  */
133 void
134 xfs_btree_check_key(
135         xfs_btnum_t     btnum,          /* btree identifier */
136         void            *ak1,           /* pointer to left (lower) key */
137         void            *ak2)           /* pointer to right (higher) key */
138 {
139         switch (btnum) {
140         case XFS_BTNUM_BNO: {
141                 xfs_alloc_key_t *k1;
142                 xfs_alloc_key_t *k2;
143
144                 k1 = ak1;
145                 k2 = ak2;
146                 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
147                 break;
148             }
149         case XFS_BTNUM_CNT: {
150                 xfs_alloc_key_t *k1;
151                 xfs_alloc_key_t *k2;
152
153                 k1 = ak1;
154                 k2 = ak2;
155                 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
156                        (k1->ar_blockcount == k2->ar_blockcount &&
157                         be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
158                 break;
159             }
160         case XFS_BTNUM_BMAP: {
161                 xfs_bmbt_key_t  *k1;
162                 xfs_bmbt_key_t  *k2;
163
164                 k1 = ak1;
165                 k2 = ak2;
166                 ASSERT(INT_GET(k1->br_startoff, ARCH_CONVERT) < INT_GET(k2->br_startoff, ARCH_CONVERT));
167                 break;
168             }
169         case XFS_BTNUM_INO: {
170                 xfs_inobt_key_t *k1;
171                 xfs_inobt_key_t *k2;
172
173                 k1 = ak1;
174                 k2 = ak2;
175                 ASSERT(INT_GET(k1->ir_startino, ARCH_CONVERT) < INT_GET(k2->ir_startino, ARCH_CONVERT));
176                 break;
177             }
178         default:
179                 ASSERT(0);
180         }
181 }
182 #endif  /* DEBUG */
183
184 /*
185  * Checking routine: check that long form block header is ok.
186  */
187 /* ARGSUSED */
188 int                                     /* error (0 or EFSCORRUPTED) */
189 xfs_btree_check_lblock(
190         xfs_btree_cur_t         *cur,   /* btree cursor */
191         xfs_btree_lblock_t      *block, /* btree long form block pointer */
192         int                     level,  /* level of the btree block */
193         xfs_buf_t               *bp)    /* buffer for block, if any */
194 {
195         int                     lblock_ok; /* block passes checks */
196         xfs_mount_t             *mp;    /* file system mount point */
197
198         mp = cur->bc_mp;
199         lblock_ok =
200                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
201                 be16_to_cpu(block->bb_level) == level &&
202                 be16_to_cpu(block->bb_numrecs) <=
203                         xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
204                 block->bb_leftsib &&
205                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
206                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
207                 block->bb_rightsib &&
208                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
209                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
210         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
211                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
212                 if (bp)
213                         xfs_buftrace("LBTREE ERROR", bp);
214                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
215                                  mp);
216                 return XFS_ERROR(EFSCORRUPTED);
217         }
218         return 0;
219 }
220
221 /*
222  * Checking routine: check that (long) pointer is ok.
223  */
224 int                                     /* error (0 or EFSCORRUPTED) */
225 xfs_btree_check_lptr(
226         xfs_btree_cur_t *cur,           /* btree cursor */
227         xfs_dfsbno_t    ptr,            /* btree block disk address */
228         int             level)          /* btree block level */
229 {
230         xfs_mount_t     *mp;            /* file system mount point */
231
232         mp = cur->bc_mp;
233         XFS_WANT_CORRUPTED_RETURN(
234                 level > 0 &&
235                 ptr != NULLDFSBNO &&
236                 XFS_FSB_SANITY_CHECK(mp, ptr));
237         return 0;
238 }
239
240 #ifdef DEBUG
241 /*
242  * Debug routine: check that records are in the right order.
243  */
244 void
245 xfs_btree_check_rec(
246         xfs_btnum_t     btnum,          /* btree identifier */
247         void            *ar1,           /* pointer to left (lower) record */
248         void            *ar2)           /* pointer to right (higher) record */
249 {
250         switch (btnum) {
251         case XFS_BTNUM_BNO: {
252                 xfs_alloc_rec_t *r1;
253                 xfs_alloc_rec_t *r2;
254
255                 r1 = ar1;
256                 r2 = ar2;
257                 ASSERT(be32_to_cpu(r1->ar_startblock) +
258                        be32_to_cpu(r1->ar_blockcount) <=
259                        be32_to_cpu(r2->ar_startblock));
260                 break;
261             }
262         case XFS_BTNUM_CNT: {
263                 xfs_alloc_rec_t *r1;
264                 xfs_alloc_rec_t *r2;
265
266                 r1 = ar1;
267                 r2 = ar2;
268                 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
269                        (r1->ar_blockcount == r2->ar_blockcount &&
270                         be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
271                 break;
272             }
273         case XFS_BTNUM_BMAP: {
274                 xfs_bmbt_rec_t  *r1;
275                 xfs_bmbt_rec_t  *r2;
276
277                 r1 = ar1;
278                 r2 = ar2;
279                 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
280                        xfs_bmbt_disk_get_blockcount(r1) <=
281                        xfs_bmbt_disk_get_startoff(r2));
282                 break;
283             }
284         case XFS_BTNUM_INO: {
285                 xfs_inobt_rec_t *r1;
286                 xfs_inobt_rec_t *r2;
287
288                 r1 = ar1;
289                 r2 = ar2;
290                 ASSERT(INT_GET(r1->ir_startino, ARCH_CONVERT) + XFS_INODES_PER_CHUNK <=
291                        INT_GET(r2->ir_startino, ARCH_CONVERT));
292                 break;
293             }
294         default:
295                 ASSERT(0);
296         }
297 }
298 #endif  /* DEBUG */
299
300 /*
301  * Checking routine: check that block header is ok.
302  */
303 /* ARGSUSED */
304 int                                     /* error (0 or EFSCORRUPTED) */
305 xfs_btree_check_sblock(
306         xfs_btree_cur_t         *cur,   /* btree cursor */
307         xfs_btree_sblock_t      *block, /* btree short form block pointer */
308         int                     level,  /* level of the btree block */
309         xfs_buf_t               *bp)    /* buffer containing block */
310 {
311         xfs_buf_t               *agbp;  /* buffer for ag. freespace struct */
312         xfs_agf_t               *agf;   /* ag. freespace structure */
313         xfs_agblock_t           agflen; /* native ag. freespace length */
314         int                     sblock_ok; /* block passes checks */
315
316         agbp = cur->bc_private.a.agbp;
317         agf = XFS_BUF_TO_AGF(agbp);
318         agflen = be32_to_cpu(agf->agf_length);
319         sblock_ok =
320                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
321                 be16_to_cpu(block->bb_level) == level &&
322                 be16_to_cpu(block->bb_numrecs) <=
323                         xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
324                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
325                  be32_to_cpu(block->bb_leftsib) < agflen) &&
326                 block->bb_leftsib &&
327                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
328                  be32_to_cpu(block->bb_rightsib) < agflen) &&
329                 block->bb_rightsib;
330         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
331                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
332                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
333                 if (bp)
334                         xfs_buftrace("SBTREE ERROR", bp);
335                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
336                                  cur->bc_mp);
337                 return XFS_ERROR(EFSCORRUPTED);
338         }
339         return 0;
340 }
341
342 /*
343  * Checking routine: check that (short) pointer is ok.
344  */
345 int                                     /* error (0 or EFSCORRUPTED) */
346 xfs_btree_check_sptr(
347         xfs_btree_cur_t *cur,           /* btree cursor */
348         xfs_agblock_t   ptr,            /* btree block disk address */
349         int             level)          /* btree block level */
350 {
351         xfs_buf_t       *agbp;          /* buffer for ag. freespace struct */
352         xfs_agf_t       *agf;           /* ag. freespace structure */
353
354         agbp = cur->bc_private.a.agbp;
355         agf = XFS_BUF_TO_AGF(agbp);
356         XFS_WANT_CORRUPTED_RETURN(
357                 level > 0 &&
358                 ptr != NULLAGBLOCK && ptr != 0 &&
359                 ptr < be32_to_cpu(agf->agf_length));
360         return 0;
361 }
362
363 /*
364  * Delete the btree cursor.
365  */
366 void
367 xfs_btree_del_cursor(
368         xfs_btree_cur_t *cur,           /* btree cursor */
369         int             error)          /* del because of error */
370 {
371         int             i;              /* btree level */
372
373         /*
374          * Clear the buffer pointers, and release the buffers.
375          * If we're doing this in the face of an error, we
376          * need to make sure to inspect all of the entries
377          * in the bc_bufs array for buffers to be unlocked.
378          * This is because some of the btree code works from
379          * level n down to 0, and if we get an error along
380          * the way we won't have initialized all the entries
381          * down to 0.
382          */
383         for (i = 0; i < cur->bc_nlevels; i++) {
384                 if (cur->bc_bufs[i])
385                         xfs_btree_setbuf(cur, i, NULL);
386                 else if (!error)
387                         break;
388         }
389         /*
390          * Can't free a bmap cursor without having dealt with the
391          * allocated indirect blocks' accounting.
392          */
393         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
394                cur->bc_private.b.allocated == 0);
395         /*
396          * Free the cursor.
397          */
398         kmem_zone_free(xfs_btree_cur_zone, cur);
399 }
400
401 /*
402  * Duplicate the btree cursor.
403  * Allocate a new one, copy the record, re-get the buffers.
404  */
405 int                                     /* error */
406 xfs_btree_dup_cursor(
407         xfs_btree_cur_t *cur,           /* input cursor */
408         xfs_btree_cur_t **ncur)         /* output cursor */
409 {
410         xfs_buf_t       *bp;            /* btree block's buffer pointer */
411         int             error;          /* error return value */
412         int             i;              /* level number of btree block */
413         xfs_mount_t     *mp;            /* mount structure for filesystem */
414         xfs_btree_cur_t *new;           /* new cursor value */
415         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
416
417         tp = cur->bc_tp;
418         mp = cur->bc_mp;
419         /*
420          * Allocate a new cursor like the old one.
421          */
422         new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp,
423                 cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip,
424                 cur->bc_private.b.whichfork);
425         /*
426          * Copy the record currently in the cursor.
427          */
428         new->bc_rec = cur->bc_rec;
429         /*
430          * For each level current, re-get the buffer and copy the ptr value.
431          */
432         for (i = 0; i < new->bc_nlevels; i++) {
433                 new->bc_ptrs[i] = cur->bc_ptrs[i];
434                 new->bc_ra[i] = cur->bc_ra[i];
435                 if ((bp = cur->bc_bufs[i])) {
436                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
437                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
438                                 xfs_btree_del_cursor(new, error);
439                                 *ncur = NULL;
440                                 return error;
441                         }
442                         new->bc_bufs[i] = bp;
443                         ASSERT(bp);
444                         ASSERT(!XFS_BUF_GETERROR(bp));
445                 } else
446                         new->bc_bufs[i] = NULL;
447         }
448         /*
449          * For bmap btrees, copy the firstblock, flist, and flags values,
450          * since init cursor doesn't get them.
451          */
452         if (new->bc_btnum == XFS_BTNUM_BMAP) {
453                 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
454                 new->bc_private.b.flist = cur->bc_private.b.flist;
455                 new->bc_private.b.flags = cur->bc_private.b.flags;
456         }
457         *ncur = new;
458         return 0;
459 }
460
461 /*
462  * Change the cursor to point to the first record at the given level.
463  * Other levels are unaffected.
464  */
465 int                                     /* success=1, failure=0 */
466 xfs_btree_firstrec(
467         xfs_btree_cur_t         *cur,   /* btree cursor */
468         int                     level)  /* level to change */
469 {
470         xfs_btree_block_t       *block; /* generic btree block pointer */
471         xfs_buf_t               *bp;    /* buffer containing block */
472
473         /*
474          * Get the block pointer for this level.
475          */
476         block = xfs_btree_get_block(cur, level, &bp);
477         xfs_btree_check_block(cur, block, level, bp);
478         /*
479          * It's empty, there is no such record.
480          */
481         if (!block->bb_h.bb_numrecs)
482                 return 0;
483         /*
484          * Set the ptr value to 1, that's the first record/key.
485          */
486         cur->bc_ptrs[level] = 1;
487         return 1;
488 }
489
490 /*
491  * Retrieve the block pointer from the cursor at the given level.
492  * This may be a bmap btree root or from a buffer.
493  */
494 STATIC xfs_btree_block_t *              /* generic btree block pointer */
495 xfs_btree_get_block(
496         xfs_btree_cur_t         *cur,   /* btree cursor */
497         int                     level,  /* level in btree */
498         xfs_buf_t               **bpp)  /* buffer containing the block */
499 {
500         xfs_btree_block_t       *block; /* return value */
501         xfs_buf_t               *bp;    /* return buffer */
502         xfs_ifork_t             *ifp;   /* inode fork pointer */
503         int                     whichfork; /* data or attr fork */
504
505         if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
506                 whichfork = cur->bc_private.b.whichfork;
507                 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
508                 block = (xfs_btree_block_t *)ifp->if_broot;
509                 bp = NULL;
510         } else {
511                 bp = cur->bc_bufs[level];
512                 block = XFS_BUF_TO_BLOCK(bp);
513         }
514         ASSERT(block != NULL);
515         *bpp = bp;
516         return block;
517 }
518
519 /*
520  * Get a buffer for the block, return it with no data read.
521  * Long-form addressing.
522  */
523 xfs_buf_t *                             /* buffer for fsbno */
524 xfs_btree_get_bufl(
525         xfs_mount_t     *mp,            /* file system mount point */
526         xfs_trans_t     *tp,            /* transaction pointer */
527         xfs_fsblock_t   fsbno,          /* file system block number */
528         uint            lock)           /* lock flags for get_buf */
529 {
530         xfs_buf_t       *bp;            /* buffer pointer (return value) */
531         xfs_daddr_t             d;              /* real disk block address */
532
533         ASSERT(fsbno != NULLFSBLOCK);
534         d = XFS_FSB_TO_DADDR(mp, fsbno);
535         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
536         ASSERT(bp);
537         ASSERT(!XFS_BUF_GETERROR(bp));
538         return bp;
539 }
540
541 /*
542  * Get a buffer for the block, return it with no data read.
543  * Short-form addressing.
544  */
545 xfs_buf_t *                             /* buffer for agno/agbno */
546 xfs_btree_get_bufs(
547         xfs_mount_t     *mp,            /* file system mount point */
548         xfs_trans_t     *tp,            /* transaction pointer */
549         xfs_agnumber_t  agno,           /* allocation group number */
550         xfs_agblock_t   agbno,          /* allocation group block number */
551         uint            lock)           /* lock flags for get_buf */
552 {
553         xfs_buf_t       *bp;            /* buffer pointer (return value) */
554         xfs_daddr_t             d;              /* real disk block address */
555
556         ASSERT(agno != NULLAGNUMBER);
557         ASSERT(agbno != NULLAGBLOCK);
558         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
559         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
560         ASSERT(bp);
561         ASSERT(!XFS_BUF_GETERROR(bp));
562         return bp;
563 }
564
565 /*
566  * Allocate a new btree cursor.
567  * The cursor is either for allocation (A) or bmap (B) or inodes (I).
568  */
569 xfs_btree_cur_t *                       /* new btree cursor */
570 xfs_btree_init_cursor(
571         xfs_mount_t     *mp,            /* file system mount point */
572         xfs_trans_t     *tp,            /* transaction pointer */
573         xfs_buf_t       *agbp,          /* (A only) buffer for agf structure */
574                                         /* (I only) buffer for agi structure */
575         xfs_agnumber_t  agno,           /* (AI only) allocation group number */
576         xfs_btnum_t     btnum,          /* btree identifier */
577         xfs_inode_t     *ip,            /* (B only) inode owning the btree */
578         int             whichfork)      /* (B only) data or attr fork */
579 {
580         xfs_agf_t       *agf;           /* (A) allocation group freespace */
581         xfs_agi_t       *agi;           /* (I) allocation group inodespace */
582         xfs_btree_cur_t *cur;           /* return value */
583         xfs_ifork_t     *ifp;           /* (I) inode fork pointer */
584         int             nlevels=0;      /* number of levels in the btree */
585
586         ASSERT(xfs_btree_cur_zone != NULL);
587         /*
588          * Allocate a new cursor.
589          */
590         cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
591         /*
592          * Deduce the number of btree levels from the arguments.
593          */
594         switch (btnum) {
595         case XFS_BTNUM_BNO:
596         case XFS_BTNUM_CNT:
597                 agf = XFS_BUF_TO_AGF(agbp);
598                 nlevels = be32_to_cpu(agf->agf_levels[btnum]);
599                 break;
600         case XFS_BTNUM_BMAP:
601                 ifp = XFS_IFORK_PTR(ip, whichfork);
602                 nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
603                 break;
604         case XFS_BTNUM_INO:
605                 agi = XFS_BUF_TO_AGI(agbp);
606                 nlevels = be32_to_cpu(agi->agi_level);
607                 break;
608         default:
609                 ASSERT(0);
610         }
611         /*
612          * Fill in the common fields.
613          */
614         cur->bc_tp = tp;
615         cur->bc_mp = mp;
616         cur->bc_nlevels = nlevels;
617         cur->bc_btnum = btnum;
618         cur->bc_blocklog = mp->m_sb.sb_blocklog;
619         /*
620          * Fill in private fields.
621          */
622         switch (btnum) {
623         case XFS_BTNUM_BNO:
624         case XFS_BTNUM_CNT:
625                 /*
626                  * Allocation btree fields.
627                  */
628                 cur->bc_private.a.agbp = agbp;
629                 cur->bc_private.a.agno = agno;
630                 break;
631         case XFS_BTNUM_BMAP:
632                 /*
633                  * Bmap btree fields.
634                  */
635                 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
636                 cur->bc_private.b.ip = ip;
637                 cur->bc_private.b.firstblock = NULLFSBLOCK;
638                 cur->bc_private.b.flist = NULL;
639                 cur->bc_private.b.allocated = 0;
640                 cur->bc_private.b.flags = 0;
641                 cur->bc_private.b.whichfork = whichfork;
642                 break;
643         case XFS_BTNUM_INO:
644                 /*
645                  * Inode allocation btree fields.
646                  */
647                 cur->bc_private.i.agbp = agbp;
648                 cur->bc_private.i.agno = agno;
649                 break;
650         default:
651                 ASSERT(0);
652         }
653         return cur;
654 }
655
656 /*
657  * Check for the cursor referring to the last block at the given level.
658  */
659 int                                     /* 1=is last block, 0=not last block */
660 xfs_btree_islastblock(
661         xfs_btree_cur_t         *cur,   /* btree cursor */
662         int                     level)  /* level to check */
663 {
664         xfs_btree_block_t       *block; /* generic btree block pointer */
665         xfs_buf_t               *bp;    /* buffer containing block */
666
667         block = xfs_btree_get_block(cur, level, &bp);
668         xfs_btree_check_block(cur, block, level, bp);
669         if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
670                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
671         else
672                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
673 }
674
675 /*
676  * Change the cursor to point to the last record in the current block
677  * at the given level.  Other levels are unaffected.
678  */
679 int                                     /* success=1, failure=0 */
680 xfs_btree_lastrec(
681         xfs_btree_cur_t         *cur,   /* btree cursor */
682         int                     level)  /* level to change */
683 {
684         xfs_btree_block_t       *block; /* generic btree block pointer */
685         xfs_buf_t               *bp;    /* buffer containing block */
686
687         /*
688          * Get the block pointer for this level.
689          */
690         block = xfs_btree_get_block(cur, level, &bp);
691         xfs_btree_check_block(cur, block, level, bp);
692         /*
693          * It's empty, there is no such record.
694          */
695         if (!block->bb_h.bb_numrecs)
696                 return 0;
697         /*
698          * Set the ptr value to numrecs, that's the last record/key.
699          */
700         cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs);
701         return 1;
702 }
703
704 /*
705  * Compute first and last byte offsets for the fields given.
706  * Interprets the offsets table, which contains struct field offsets.
707  */
708 void
709 xfs_btree_offsets(
710         __int64_t       fields,         /* bitmask of fields */
711         const short     *offsets,       /* table of field offsets */
712         int             nbits,          /* number of bits to inspect */
713         int             *first,         /* output: first byte offset */
714         int             *last)          /* output: last byte offset */
715 {
716         int             i;              /* current bit number */
717         __int64_t       imask;          /* mask for current bit number */
718
719         ASSERT(fields != 0);
720         /*
721          * Find the lowest bit, so the first byte offset.
722          */
723         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
724                 if (imask & fields) {
725                         *first = offsets[i];
726                         break;
727                 }
728         }
729         /*
730          * Find the highest bit, so the last byte offset.
731          */
732         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
733                 if (imask & fields) {
734                         *last = offsets[i + 1] - 1;
735                         break;
736                 }
737         }
738 }
739
740 /*
741  * Get a buffer for the block, return it read in.
742  * Long-form addressing.
743  */
744 int                                     /* error */
745 xfs_btree_read_bufl(
746         xfs_mount_t     *mp,            /* file system mount point */
747         xfs_trans_t     *tp,            /* transaction pointer */
748         xfs_fsblock_t   fsbno,          /* file system block number */
749         uint            lock,           /* lock flags for read_buf */
750         xfs_buf_t       **bpp,          /* buffer for fsbno */
751         int             refval)         /* ref count value for buffer */
752 {
753         xfs_buf_t       *bp;            /* return value */
754         xfs_daddr_t             d;              /* real disk block address */
755         int             error;
756
757         ASSERT(fsbno != NULLFSBLOCK);
758         d = XFS_FSB_TO_DADDR(mp, fsbno);
759         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
760                         mp->m_bsize, lock, &bp))) {
761                 return error;
762         }
763         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
764         if (bp != NULL) {
765                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
766         }
767         *bpp = bp;
768         return 0;
769 }
770
771 /*
772  * Get a buffer for the block, return it read in.
773  * Short-form addressing.
774  */
775 int                                     /* error */
776 xfs_btree_read_bufs(
777         xfs_mount_t     *mp,            /* file system mount point */
778         xfs_trans_t     *tp,            /* transaction pointer */
779         xfs_agnumber_t  agno,           /* allocation group number */
780         xfs_agblock_t   agbno,          /* allocation group block number */
781         uint            lock,           /* lock flags for read_buf */
782         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
783         int             refval)         /* ref count value for buffer */
784 {
785         xfs_buf_t       *bp;            /* return value */
786         xfs_daddr_t     d;              /* real disk block address */
787         int             error;
788
789         ASSERT(agno != NULLAGNUMBER);
790         ASSERT(agbno != NULLAGBLOCK);
791         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
792         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
793                                         mp->m_bsize, lock, &bp))) {
794                 return error;
795         }
796         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
797         if (bp != NULL) {
798                 switch (refval) {
799                 case XFS_ALLOC_BTREE_REF:
800                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
801                         break;
802                 case XFS_INO_BTREE_REF:
803                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
804                         break;
805                 }
806         }
807         *bpp = bp;
808         return 0;
809 }
810
811 /*
812  * Read-ahead the block, don't wait for it, don't return a buffer.
813  * Long-form addressing.
814  */
815 /* ARGSUSED */
816 void
817 xfs_btree_reada_bufl(
818         xfs_mount_t     *mp,            /* file system mount point */
819         xfs_fsblock_t   fsbno,          /* file system block number */
820         xfs_extlen_t    count)          /* count of filesystem blocks */
821 {
822         xfs_daddr_t             d;
823
824         ASSERT(fsbno != NULLFSBLOCK);
825         d = XFS_FSB_TO_DADDR(mp, fsbno);
826         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
827 }
828
829 /*
830  * Read-ahead the block, don't wait for it, don't return a buffer.
831  * Short-form addressing.
832  */
833 /* ARGSUSED */
834 void
835 xfs_btree_reada_bufs(
836         xfs_mount_t     *mp,            /* file system mount point */
837         xfs_agnumber_t  agno,           /* allocation group number */
838         xfs_agblock_t   agbno,          /* allocation group block number */
839         xfs_extlen_t    count)          /* count of filesystem blocks */
840 {
841         xfs_daddr_t             d;
842
843         ASSERT(agno != NULLAGNUMBER);
844         ASSERT(agbno != NULLAGBLOCK);
845         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
846         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
847 }
848
849 /*
850  * Read-ahead btree blocks, at the given level.
851  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
852  */
853 int
854 xfs_btree_readahead_core(
855         xfs_btree_cur_t         *cur,           /* btree cursor */
856         int                     lev,            /* level in btree */
857         int                     lr)             /* left/right bits */
858 {
859         xfs_alloc_block_t       *a;
860         xfs_bmbt_block_t        *b;
861         xfs_inobt_block_t       *i;
862         int                     rval = 0;
863
864         ASSERT(cur->bc_bufs[lev] != NULL);
865         cur->bc_ra[lev] |= lr;
866         switch (cur->bc_btnum) {
867         case XFS_BTNUM_BNO:
868         case XFS_BTNUM_CNT:
869                 a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
870                 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) {
871                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
872                                 be32_to_cpu(a->bb_leftsib), 1);
873                         rval++;
874                 }
875                 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) {
876                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
877                                 be32_to_cpu(a->bb_rightsib), 1);
878                         rval++;
879                 }
880                 break;
881         case XFS_BTNUM_BMAP:
882                 b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
883                 if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) {
884                         xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1);
885                         rval++;
886                 }
887                 if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) {
888                         xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1);
889                         rval++;
890                 }
891                 break;
892         case XFS_BTNUM_INO:
893                 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
894                 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
895                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
896                                 be32_to_cpu(i->bb_leftsib), 1);
897                         rval++;
898                 }
899                 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
900                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.i.agno,
901                                 be32_to_cpu(i->bb_rightsib), 1);
902                         rval++;
903                 }
904                 break;
905         default:
906                 ASSERT(0);
907         }
908         return rval;
909 }
910
911 /*
912  * Set the buffer for level "lev" in the cursor to bp, releasing
913  * any previous buffer.
914  */
915 void
916 xfs_btree_setbuf(
917         xfs_btree_cur_t         *cur,   /* btree cursor */
918         int                     lev,    /* level in btree */
919         xfs_buf_t               *bp)    /* new buffer to set */
920 {
921         xfs_btree_block_t       *b;     /* btree block */
922         xfs_buf_t               *obp;   /* old buffer pointer */
923
924         obp = cur->bc_bufs[lev];
925         if (obp)
926                 xfs_trans_brelse(cur->bc_tp, obp);
927         cur->bc_bufs[lev] = bp;
928         cur->bc_ra[lev] = 0;
929         if (!bp)
930                 return;
931         b = XFS_BUF_TO_BLOCK(bp);
932         if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
933                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
934                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
935                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
936                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
937         } else {
938                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
939                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
940                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
941                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
942         }
943 }