jbd: fix error handling for checkpoint io
[linux-2.6] / fs / xfs / xfs_btree.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_error.h"
40
41 /*
42  * Cursor allocation zone.
43  */
44 kmem_zone_t     *xfs_btree_cur_zone;
45
46 /*
47  * Btree magic numbers.
48  */
49 const __uint32_t xfs_magics[XFS_BTNUM_MAX] = {
50         XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC
51 };
52
53 /*
54  * Checking routine: return maxrecs for the block.
55  */
56 STATIC int                              /* number of records fitting in block */
57 xfs_btree_maxrecs(
58         xfs_btree_cur_t         *cur,   /* btree cursor */
59         xfs_btree_block_t       *block) /* generic btree block pointer */
60 {
61         switch (cur->bc_btnum) {
62         case XFS_BTNUM_BNO:
63         case XFS_BTNUM_CNT:
64                 return (int)XFS_ALLOC_BLOCK_MAXRECS(
65                                 be16_to_cpu(block->bb_h.bb_level), cur);
66         case XFS_BTNUM_BMAP:
67                 return (int)XFS_BMAP_BLOCK_IMAXRECS(
68                                 be16_to_cpu(block->bb_h.bb_level), cur);
69         case XFS_BTNUM_INO:
70                 return (int)XFS_INOBT_BLOCK_MAXRECS(
71                                 be16_to_cpu(block->bb_h.bb_level), cur);
72         default:
73                 ASSERT(0);
74                 return 0;
75         }
76 }
77
78 /*
79  * External routines.
80  */
81
82 #ifdef DEBUG
83 /*
84  * Debug routine: check that block header is ok.
85  */
86 void
87 xfs_btree_check_block(
88         xfs_btree_cur_t         *cur,   /* btree cursor */
89         xfs_btree_block_t       *block, /* generic btree block pointer */
90         int                     level,  /* level of the btree block */
91         xfs_buf_t               *bp)    /* buffer containing block, if any */
92 {
93         if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
94                 xfs_btree_check_lblock(cur, (xfs_btree_lblock_t *)block, level,
95                         bp);
96         else
97                 xfs_btree_check_sblock(cur, (xfs_btree_sblock_t *)block, level,
98                         bp);
99 }
100
101 /*
102  * Debug routine: check that keys are in the right order.
103  */
104 void
105 xfs_btree_check_key(
106         xfs_btnum_t     btnum,          /* btree identifier */
107         void            *ak1,           /* pointer to left (lower) key */
108         void            *ak2)           /* pointer to right (higher) key */
109 {
110         switch (btnum) {
111         case XFS_BTNUM_BNO: {
112                 xfs_alloc_key_t *k1;
113                 xfs_alloc_key_t *k2;
114
115                 k1 = ak1;
116                 k2 = ak2;
117                 ASSERT(be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock));
118                 break;
119             }
120         case XFS_BTNUM_CNT: {
121                 xfs_alloc_key_t *k1;
122                 xfs_alloc_key_t *k2;
123
124                 k1 = ak1;
125                 k2 = ak2;
126                 ASSERT(be32_to_cpu(k1->ar_blockcount) < be32_to_cpu(k2->ar_blockcount) ||
127                        (k1->ar_blockcount == k2->ar_blockcount &&
128                         be32_to_cpu(k1->ar_startblock) < be32_to_cpu(k2->ar_startblock)));
129                 break;
130             }
131         case XFS_BTNUM_BMAP: {
132                 xfs_bmbt_key_t  *k1;
133                 xfs_bmbt_key_t  *k2;
134
135                 k1 = ak1;
136                 k2 = ak2;
137                 ASSERT(be64_to_cpu(k1->br_startoff) < be64_to_cpu(k2->br_startoff));
138                 break;
139             }
140         case XFS_BTNUM_INO: {
141                 xfs_inobt_key_t *k1;
142                 xfs_inobt_key_t *k2;
143
144                 k1 = ak1;
145                 k2 = ak2;
146                 ASSERT(be32_to_cpu(k1->ir_startino) < be32_to_cpu(k2->ir_startino));
147                 break;
148             }
149         default:
150                 ASSERT(0);
151         }
152 }
153 #endif  /* DEBUG */
154
155 /*
156  * Checking routine: check that long form block header is ok.
157  */
158 /* ARGSUSED */
159 int                                     /* error (0 or EFSCORRUPTED) */
160 xfs_btree_check_lblock(
161         xfs_btree_cur_t         *cur,   /* btree cursor */
162         xfs_btree_lblock_t      *block, /* btree long form block pointer */
163         int                     level,  /* level of the btree block */
164         xfs_buf_t               *bp)    /* buffer for block, if any */
165 {
166         int                     lblock_ok; /* block passes checks */
167         xfs_mount_t             *mp;    /* file system mount point */
168
169         mp = cur->bc_mp;
170         lblock_ok =
171                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
172                 be16_to_cpu(block->bb_level) == level &&
173                 be16_to_cpu(block->bb_numrecs) <=
174                         xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
175                 block->bb_leftsib &&
176                 (be64_to_cpu(block->bb_leftsib) == NULLDFSBNO ||
177                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_leftsib))) &&
178                 block->bb_rightsib &&
179                 (be64_to_cpu(block->bb_rightsib) == NULLDFSBNO ||
180                  XFS_FSB_SANITY_CHECK(mp, be64_to_cpu(block->bb_rightsib)));
181         if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK,
182                         XFS_RANDOM_BTREE_CHECK_LBLOCK))) {
183                 if (bp)
184                         xfs_buftrace("LBTREE ERROR", bp);
185                 XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW,
186                                  mp);
187                 return XFS_ERROR(EFSCORRUPTED);
188         }
189         return 0;
190 }
191
192 /*
193  * Checking routine: check that (long) pointer is ok.
194  */
195 int                                     /* error (0 or EFSCORRUPTED) */
196 xfs_btree_check_lptr(
197         xfs_btree_cur_t *cur,           /* btree cursor */
198         xfs_dfsbno_t    ptr,            /* btree block disk address */
199         int             level)          /* btree block level */
200 {
201         xfs_mount_t     *mp;            /* file system mount point */
202
203         mp = cur->bc_mp;
204         XFS_WANT_CORRUPTED_RETURN(
205                 level > 0 &&
206                 ptr != NULLDFSBNO &&
207                 XFS_FSB_SANITY_CHECK(mp, ptr));
208         return 0;
209 }
210
211 #ifdef DEBUG
212 /*
213  * Debug routine: check that records are in the right order.
214  */
215 void
216 xfs_btree_check_rec(
217         xfs_btnum_t     btnum,          /* btree identifier */
218         void            *ar1,           /* pointer to left (lower) record */
219         void            *ar2)           /* pointer to right (higher) record */
220 {
221         switch (btnum) {
222         case XFS_BTNUM_BNO: {
223                 xfs_alloc_rec_t *r1;
224                 xfs_alloc_rec_t *r2;
225
226                 r1 = ar1;
227                 r2 = ar2;
228                 ASSERT(be32_to_cpu(r1->ar_startblock) +
229                        be32_to_cpu(r1->ar_blockcount) <=
230                        be32_to_cpu(r2->ar_startblock));
231                 break;
232             }
233         case XFS_BTNUM_CNT: {
234                 xfs_alloc_rec_t *r1;
235                 xfs_alloc_rec_t *r2;
236
237                 r1 = ar1;
238                 r2 = ar2;
239                 ASSERT(be32_to_cpu(r1->ar_blockcount) < be32_to_cpu(r2->ar_blockcount) ||
240                        (r1->ar_blockcount == r2->ar_blockcount &&
241                         be32_to_cpu(r1->ar_startblock) < be32_to_cpu(r2->ar_startblock)));
242                 break;
243             }
244         case XFS_BTNUM_BMAP: {
245                 xfs_bmbt_rec_t  *r1;
246                 xfs_bmbt_rec_t  *r2;
247
248                 r1 = ar1;
249                 r2 = ar2;
250                 ASSERT(xfs_bmbt_disk_get_startoff(r1) +
251                        xfs_bmbt_disk_get_blockcount(r1) <=
252                        xfs_bmbt_disk_get_startoff(r2));
253                 break;
254             }
255         case XFS_BTNUM_INO: {
256                 xfs_inobt_rec_t *r1;
257                 xfs_inobt_rec_t *r2;
258
259                 r1 = ar1;
260                 r2 = ar2;
261                 ASSERT(be32_to_cpu(r1->ir_startino) + XFS_INODES_PER_CHUNK <=
262                        be32_to_cpu(r2->ir_startino));
263                 break;
264             }
265         default:
266                 ASSERT(0);
267         }
268 }
269 #endif  /* DEBUG */
270
271 /*
272  * Checking routine: check that block header is ok.
273  */
274 /* ARGSUSED */
275 int                                     /* error (0 or EFSCORRUPTED) */
276 xfs_btree_check_sblock(
277         xfs_btree_cur_t         *cur,   /* btree cursor */
278         xfs_btree_sblock_t      *block, /* btree short form block pointer */
279         int                     level,  /* level of the btree block */
280         xfs_buf_t               *bp)    /* buffer containing block */
281 {
282         xfs_buf_t               *agbp;  /* buffer for ag. freespace struct */
283         xfs_agf_t               *agf;   /* ag. freespace structure */
284         xfs_agblock_t           agflen; /* native ag. freespace length */
285         int                     sblock_ok; /* block passes checks */
286
287         agbp = cur->bc_private.a.agbp;
288         agf = XFS_BUF_TO_AGF(agbp);
289         agflen = be32_to_cpu(agf->agf_length);
290         sblock_ok =
291                 be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] &&
292                 be16_to_cpu(block->bb_level) == level &&
293                 be16_to_cpu(block->bb_numrecs) <=
294                         xfs_btree_maxrecs(cur, (xfs_btree_block_t *)block) &&
295                 (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK ||
296                  be32_to_cpu(block->bb_leftsib) < agflen) &&
297                 block->bb_leftsib &&
298                 (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK ||
299                  be32_to_cpu(block->bb_rightsib) < agflen) &&
300                 block->bb_rightsib;
301         if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
302                         XFS_ERRTAG_BTREE_CHECK_SBLOCK,
303                         XFS_RANDOM_BTREE_CHECK_SBLOCK))) {
304                 if (bp)
305                         xfs_buftrace("SBTREE ERROR", bp);
306                 XFS_ERROR_REPORT("xfs_btree_check_sblock", XFS_ERRLEVEL_LOW,
307                                  cur->bc_mp);
308                 return XFS_ERROR(EFSCORRUPTED);
309         }
310         return 0;
311 }
312
313 /*
314  * Checking routine: check that (short) pointer is ok.
315  */
316 int                                     /* error (0 or EFSCORRUPTED) */
317 xfs_btree_check_sptr(
318         xfs_btree_cur_t *cur,           /* btree cursor */
319         xfs_agblock_t   ptr,            /* btree block disk address */
320         int             level)          /* btree block level */
321 {
322         xfs_buf_t       *agbp;          /* buffer for ag. freespace struct */
323         xfs_agf_t       *agf;           /* ag. freespace structure */
324
325         agbp = cur->bc_private.a.agbp;
326         agf = XFS_BUF_TO_AGF(agbp);
327         XFS_WANT_CORRUPTED_RETURN(
328                 level > 0 &&
329                 ptr != NULLAGBLOCK && ptr != 0 &&
330                 ptr < be32_to_cpu(agf->agf_length));
331         return 0;
332 }
333
334 /*
335  * Delete the btree cursor.
336  */
337 void
338 xfs_btree_del_cursor(
339         xfs_btree_cur_t *cur,           /* btree cursor */
340         int             error)          /* del because of error */
341 {
342         int             i;              /* btree level */
343
344         /*
345          * Clear the buffer pointers, and release the buffers.
346          * If we're doing this in the face of an error, we
347          * need to make sure to inspect all of the entries
348          * in the bc_bufs array for buffers to be unlocked.
349          * This is because some of the btree code works from
350          * level n down to 0, and if we get an error along
351          * the way we won't have initialized all the entries
352          * down to 0.
353          */
354         for (i = 0; i < cur->bc_nlevels; i++) {
355                 if (cur->bc_bufs[i])
356                         xfs_btree_setbuf(cur, i, NULL);
357                 else if (!error)
358                         break;
359         }
360         /*
361          * Can't free a bmap cursor without having dealt with the
362          * allocated indirect blocks' accounting.
363          */
364         ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP ||
365                cur->bc_private.b.allocated == 0);
366         /*
367          * Free the cursor.
368          */
369         kmem_zone_free(xfs_btree_cur_zone, cur);
370 }
371
372 /*
373  * Duplicate the btree cursor.
374  * Allocate a new one, copy the record, re-get the buffers.
375  */
376 int                                     /* error */
377 xfs_btree_dup_cursor(
378         xfs_btree_cur_t *cur,           /* input cursor */
379         xfs_btree_cur_t **ncur)         /* output cursor */
380 {
381         xfs_buf_t       *bp;            /* btree block's buffer pointer */
382         int             error;          /* error return value */
383         int             i;              /* level number of btree block */
384         xfs_mount_t     *mp;            /* mount structure for filesystem */
385         xfs_btree_cur_t *new;           /* new cursor value */
386         xfs_trans_t     *tp;            /* transaction pointer, can be NULL */
387
388         tp = cur->bc_tp;
389         mp = cur->bc_mp;
390         /*
391          * Allocate a new cursor like the old one.
392          */
393         new = xfs_btree_init_cursor(mp, tp, cur->bc_private.a.agbp,
394                 cur->bc_private.a.agno, cur->bc_btnum, cur->bc_private.b.ip,
395                 cur->bc_private.b.whichfork);
396         /*
397          * Copy the record currently in the cursor.
398          */
399         new->bc_rec = cur->bc_rec;
400         /*
401          * For each level current, re-get the buffer and copy the ptr value.
402          */
403         for (i = 0; i < new->bc_nlevels; i++) {
404                 new->bc_ptrs[i] = cur->bc_ptrs[i];
405                 new->bc_ra[i] = cur->bc_ra[i];
406                 if ((bp = cur->bc_bufs[i])) {
407                         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
408                                 XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) {
409                                 xfs_btree_del_cursor(new, error);
410                                 *ncur = NULL;
411                                 return error;
412                         }
413                         new->bc_bufs[i] = bp;
414                         ASSERT(bp);
415                         ASSERT(!XFS_BUF_GETERROR(bp));
416                 } else
417                         new->bc_bufs[i] = NULL;
418         }
419         /*
420          * For bmap btrees, copy the firstblock, flist, and flags values,
421          * since init cursor doesn't get them.
422          */
423         if (new->bc_btnum == XFS_BTNUM_BMAP) {
424                 new->bc_private.b.firstblock = cur->bc_private.b.firstblock;
425                 new->bc_private.b.flist = cur->bc_private.b.flist;
426                 new->bc_private.b.flags = cur->bc_private.b.flags;
427         }
428         *ncur = new;
429         return 0;
430 }
431
432 /*
433  * Retrieve the block pointer from the cursor at the given level.
434  * This may be a bmap btree root or from a buffer.
435  */
436 STATIC xfs_btree_block_t *              /* generic btree block pointer */
437 xfs_btree_get_block(
438         xfs_btree_cur_t         *cur,   /* btree cursor */
439         int                     level,  /* level in btree */
440         xfs_buf_t               **bpp)  /* buffer containing the block */
441 {
442         xfs_btree_block_t       *block; /* return value */
443         xfs_buf_t               *bp;    /* return buffer */
444         xfs_ifork_t             *ifp;   /* inode fork pointer */
445         int                     whichfork; /* data or attr fork */
446
447         if (cur->bc_btnum == XFS_BTNUM_BMAP && level == cur->bc_nlevels - 1) {
448                 whichfork = cur->bc_private.b.whichfork;
449                 ifp = XFS_IFORK_PTR(cur->bc_private.b.ip, whichfork);
450                 block = (xfs_btree_block_t *)ifp->if_broot;
451                 bp = NULL;
452         } else {
453                 bp = cur->bc_bufs[level];
454                 block = XFS_BUF_TO_BLOCK(bp);
455         }
456         ASSERT(block != NULL);
457         *bpp = bp;
458         return block;
459 }
460
461 /*
462  * Get a buffer for the block, return it with no data read.
463  * Long-form addressing.
464  */
465 xfs_buf_t *                             /* buffer for fsbno */
466 xfs_btree_get_bufl(
467         xfs_mount_t     *mp,            /* file system mount point */
468         xfs_trans_t     *tp,            /* transaction pointer */
469         xfs_fsblock_t   fsbno,          /* file system block number */
470         uint            lock)           /* lock flags for get_buf */
471 {
472         xfs_buf_t       *bp;            /* buffer pointer (return value) */
473         xfs_daddr_t             d;              /* real disk block address */
474
475         ASSERT(fsbno != NULLFSBLOCK);
476         d = XFS_FSB_TO_DADDR(mp, fsbno);
477         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
478         ASSERT(bp);
479         ASSERT(!XFS_BUF_GETERROR(bp));
480         return bp;
481 }
482
483 /*
484  * Get a buffer for the block, return it with no data read.
485  * Short-form addressing.
486  */
487 xfs_buf_t *                             /* buffer for agno/agbno */
488 xfs_btree_get_bufs(
489         xfs_mount_t     *mp,            /* file system mount point */
490         xfs_trans_t     *tp,            /* transaction pointer */
491         xfs_agnumber_t  agno,           /* allocation group number */
492         xfs_agblock_t   agbno,          /* allocation group block number */
493         uint            lock)           /* lock flags for get_buf */
494 {
495         xfs_buf_t       *bp;            /* buffer pointer (return value) */
496         xfs_daddr_t             d;              /* real disk block address */
497
498         ASSERT(agno != NULLAGNUMBER);
499         ASSERT(agbno != NULLAGBLOCK);
500         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
501         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
502         ASSERT(bp);
503         ASSERT(!XFS_BUF_GETERROR(bp));
504         return bp;
505 }
506
507 /*
508  * Allocate a new btree cursor.
509  * The cursor is either for allocation (A) or bmap (B) or inodes (I).
510  */
511 xfs_btree_cur_t *                       /* new btree cursor */
512 xfs_btree_init_cursor(
513         xfs_mount_t     *mp,            /* file system mount point */
514         xfs_trans_t     *tp,            /* transaction pointer */
515         xfs_buf_t       *agbp,          /* (A only) buffer for agf structure */
516                                         /* (I only) buffer for agi structure */
517         xfs_agnumber_t  agno,           /* (AI only) allocation group number */
518         xfs_btnum_t     btnum,          /* btree identifier */
519         xfs_inode_t     *ip,            /* (B only) inode owning the btree */
520         int             whichfork)      /* (B only) data or attr fork */
521 {
522         xfs_agf_t       *agf;           /* (A) allocation group freespace */
523         xfs_agi_t       *agi;           /* (I) allocation group inodespace */
524         xfs_btree_cur_t *cur;           /* return value */
525         xfs_ifork_t     *ifp;           /* (I) inode fork pointer */
526         int             nlevels=0;      /* number of levels in the btree */
527
528         ASSERT(xfs_btree_cur_zone != NULL);
529         /*
530          * Allocate a new cursor.
531          */
532         cur = kmem_zone_zalloc(xfs_btree_cur_zone, KM_SLEEP);
533         /*
534          * Deduce the number of btree levels from the arguments.
535          */
536         switch (btnum) {
537         case XFS_BTNUM_BNO:
538         case XFS_BTNUM_CNT:
539                 agf = XFS_BUF_TO_AGF(agbp);
540                 nlevels = be32_to_cpu(agf->agf_levels[btnum]);
541                 break;
542         case XFS_BTNUM_BMAP:
543                 ifp = XFS_IFORK_PTR(ip, whichfork);
544                 nlevels = be16_to_cpu(ifp->if_broot->bb_level) + 1;
545                 break;
546         case XFS_BTNUM_INO:
547                 agi = XFS_BUF_TO_AGI(agbp);
548                 nlevels = be32_to_cpu(agi->agi_level);
549                 break;
550         default:
551                 ASSERT(0);
552         }
553         /*
554          * Fill in the common fields.
555          */
556         cur->bc_tp = tp;
557         cur->bc_mp = mp;
558         cur->bc_nlevels = nlevels;
559         cur->bc_btnum = btnum;
560         cur->bc_blocklog = mp->m_sb.sb_blocklog;
561         /*
562          * Fill in private fields.
563          */
564         switch (btnum) {
565         case XFS_BTNUM_BNO:
566         case XFS_BTNUM_CNT:
567                 /*
568                  * Allocation btree fields.
569                  */
570                 cur->bc_private.a.agbp = agbp;
571                 cur->bc_private.a.agno = agno;
572                 break;
573         case XFS_BTNUM_INO:
574                 /*
575                  * Inode allocation btree fields.
576                  */
577                 cur->bc_private.a.agbp = agbp;
578                 cur->bc_private.a.agno = agno;
579                 break;
580         case XFS_BTNUM_BMAP:
581                 /*
582                  * Bmap btree fields.
583                  */
584                 cur->bc_private.b.forksize = XFS_IFORK_SIZE(ip, whichfork);
585                 cur->bc_private.b.ip = ip;
586                 cur->bc_private.b.firstblock = NULLFSBLOCK;
587                 cur->bc_private.b.flist = NULL;
588                 cur->bc_private.b.allocated = 0;
589                 cur->bc_private.b.flags = 0;
590                 cur->bc_private.b.whichfork = whichfork;
591                 break;
592         default:
593                 ASSERT(0);
594         }
595         return cur;
596 }
597
598 /*
599  * Check for the cursor referring to the last block at the given level.
600  */
601 int                                     /* 1=is last block, 0=not last block */
602 xfs_btree_islastblock(
603         xfs_btree_cur_t         *cur,   /* btree cursor */
604         int                     level)  /* level to check */
605 {
606         xfs_btree_block_t       *block; /* generic btree block pointer */
607         xfs_buf_t               *bp;    /* buffer containing block */
608
609         block = xfs_btree_get_block(cur, level, &bp);
610         xfs_btree_check_block(cur, block, level, bp);
611         if (XFS_BTREE_LONG_PTRS(cur->bc_btnum))
612                 return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
613         else
614                 return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
615 }
616
617 /*
618  * Change the cursor to point to the first record at the given level.
619  * Other levels are unaffected.
620  */
621 int                                     /* success=1, failure=0 */
622 xfs_btree_firstrec(
623         xfs_btree_cur_t         *cur,   /* btree cursor */
624         int                     level)  /* level to change */
625 {
626         xfs_btree_block_t       *block; /* generic btree block pointer */
627         xfs_buf_t               *bp;    /* buffer containing block */
628
629         /*
630          * Get the block pointer for this level.
631          */
632         block = xfs_btree_get_block(cur, level, &bp);
633         xfs_btree_check_block(cur, block, level, bp);
634         /*
635          * It's empty, there is no such record.
636          */
637         if (!block->bb_h.bb_numrecs)
638                 return 0;
639         /*
640          * Set the ptr value to 1, that's the first record/key.
641          */
642         cur->bc_ptrs[level] = 1;
643         return 1;
644 }
645
646 /*
647  * Change the cursor to point to the last record in the current block
648  * at the given level.  Other levels are unaffected.
649  */
650 int                                     /* success=1, failure=0 */
651 xfs_btree_lastrec(
652         xfs_btree_cur_t         *cur,   /* btree cursor */
653         int                     level)  /* level to change */
654 {
655         xfs_btree_block_t       *block; /* generic btree block pointer */
656         xfs_buf_t               *bp;    /* buffer containing block */
657
658         /*
659          * Get the block pointer for this level.
660          */
661         block = xfs_btree_get_block(cur, level, &bp);
662         xfs_btree_check_block(cur, block, level, bp);
663         /*
664          * It's empty, there is no such record.
665          */
666         if (!block->bb_h.bb_numrecs)
667                 return 0;
668         /*
669          * Set the ptr value to numrecs, that's the last record/key.
670          */
671         cur->bc_ptrs[level] = be16_to_cpu(block->bb_h.bb_numrecs);
672         return 1;
673 }
674
675 /*
676  * Compute first and last byte offsets for the fields given.
677  * Interprets the offsets table, which contains struct field offsets.
678  */
679 void
680 xfs_btree_offsets(
681         __int64_t       fields,         /* bitmask of fields */
682         const short     *offsets,       /* table of field offsets */
683         int             nbits,          /* number of bits to inspect */
684         int             *first,         /* output: first byte offset */
685         int             *last)          /* output: last byte offset */
686 {
687         int             i;              /* current bit number */
688         __int64_t       imask;          /* mask for current bit number */
689
690         ASSERT(fields != 0);
691         /*
692          * Find the lowest bit, so the first byte offset.
693          */
694         for (i = 0, imask = 1LL; ; i++, imask <<= 1) {
695                 if (imask & fields) {
696                         *first = offsets[i];
697                         break;
698                 }
699         }
700         /*
701          * Find the highest bit, so the last byte offset.
702          */
703         for (i = nbits - 1, imask = 1LL << i; ; i--, imask >>= 1) {
704                 if (imask & fields) {
705                         *last = offsets[i + 1] - 1;
706                         break;
707                 }
708         }
709 }
710
711 /*
712  * Get a buffer for the block, return it read in.
713  * Long-form addressing.
714  */
715 int                                     /* error */
716 xfs_btree_read_bufl(
717         xfs_mount_t     *mp,            /* file system mount point */
718         xfs_trans_t     *tp,            /* transaction pointer */
719         xfs_fsblock_t   fsbno,          /* file system block number */
720         uint            lock,           /* lock flags for read_buf */
721         xfs_buf_t       **bpp,          /* buffer for fsbno */
722         int             refval)         /* ref count value for buffer */
723 {
724         xfs_buf_t       *bp;            /* return value */
725         xfs_daddr_t             d;              /* real disk block address */
726         int             error;
727
728         ASSERT(fsbno != NULLFSBLOCK);
729         d = XFS_FSB_TO_DADDR(mp, fsbno);
730         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
731                         mp->m_bsize, lock, &bp))) {
732                 return error;
733         }
734         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
735         if (bp != NULL) {
736                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
737         }
738         *bpp = bp;
739         return 0;
740 }
741
742 /*
743  * Get a buffer for the block, return it read in.
744  * Short-form addressing.
745  */
746 int                                     /* error */
747 xfs_btree_read_bufs(
748         xfs_mount_t     *mp,            /* file system mount point */
749         xfs_trans_t     *tp,            /* transaction pointer */
750         xfs_agnumber_t  agno,           /* allocation group number */
751         xfs_agblock_t   agbno,          /* allocation group block number */
752         uint            lock,           /* lock flags for read_buf */
753         xfs_buf_t       **bpp,          /* buffer for agno/agbno */
754         int             refval)         /* ref count value for buffer */
755 {
756         xfs_buf_t       *bp;            /* return value */
757         xfs_daddr_t     d;              /* real disk block address */
758         int             error;
759
760         ASSERT(agno != NULLAGNUMBER);
761         ASSERT(agbno != NULLAGBLOCK);
762         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
763         if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
764                                         mp->m_bsize, lock, &bp))) {
765                 return error;
766         }
767         ASSERT(!bp || !XFS_BUF_GETERROR(bp));
768         if (bp != NULL) {
769                 switch (refval) {
770                 case XFS_ALLOC_BTREE_REF:
771                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
772                         break;
773                 case XFS_INO_BTREE_REF:
774                         XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, refval);
775                         break;
776                 }
777         }
778         *bpp = bp;
779         return 0;
780 }
781
782 /*
783  * Read-ahead the block, don't wait for it, don't return a buffer.
784  * Long-form addressing.
785  */
786 /* ARGSUSED */
787 void
788 xfs_btree_reada_bufl(
789         xfs_mount_t     *mp,            /* file system mount point */
790         xfs_fsblock_t   fsbno,          /* file system block number */
791         xfs_extlen_t    count)          /* count of filesystem blocks */
792 {
793         xfs_daddr_t             d;
794
795         ASSERT(fsbno != NULLFSBLOCK);
796         d = XFS_FSB_TO_DADDR(mp, fsbno);
797         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
798 }
799
800 /*
801  * Read-ahead the block, don't wait for it, don't return a buffer.
802  * Short-form addressing.
803  */
804 /* ARGSUSED */
805 void
806 xfs_btree_reada_bufs(
807         xfs_mount_t     *mp,            /* file system mount point */
808         xfs_agnumber_t  agno,           /* allocation group number */
809         xfs_agblock_t   agbno,          /* allocation group block number */
810         xfs_extlen_t    count)          /* count of filesystem blocks */
811 {
812         xfs_daddr_t             d;
813
814         ASSERT(agno != NULLAGNUMBER);
815         ASSERT(agbno != NULLAGBLOCK);
816         d = XFS_AGB_TO_DADDR(mp, agno, agbno);
817         xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count);
818 }
819
820 /*
821  * Read-ahead btree blocks, at the given level.
822  * Bits in lr are set from XFS_BTCUR_{LEFT,RIGHT}RA.
823  */
824 int
825 xfs_btree_readahead_core(
826         xfs_btree_cur_t         *cur,           /* btree cursor */
827         int                     lev,            /* level in btree */
828         int                     lr)             /* left/right bits */
829 {
830         xfs_alloc_block_t       *a;
831         xfs_bmbt_block_t        *b;
832         xfs_inobt_block_t       *i;
833         int                     rval = 0;
834
835         ASSERT(cur->bc_bufs[lev] != NULL);
836         cur->bc_ra[lev] |= lr;
837         switch (cur->bc_btnum) {
838         case XFS_BTNUM_BNO:
839         case XFS_BTNUM_CNT:
840                 a = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]);
841                 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(a->bb_leftsib) != NULLAGBLOCK) {
842                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
843                                 be32_to_cpu(a->bb_leftsib), 1);
844                         rval++;
845                 }
846                 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(a->bb_rightsib) != NULLAGBLOCK) {
847                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
848                                 be32_to_cpu(a->bb_rightsib), 1);
849                         rval++;
850                 }
851                 break;
852         case XFS_BTNUM_BMAP:
853                 b = XFS_BUF_TO_BMBT_BLOCK(cur->bc_bufs[lev]);
854                 if ((lr & XFS_BTCUR_LEFTRA) && be64_to_cpu(b->bb_leftsib) != NULLDFSBNO) {
855                         xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_leftsib), 1);
856                         rval++;
857                 }
858                 if ((lr & XFS_BTCUR_RIGHTRA) && be64_to_cpu(b->bb_rightsib) != NULLDFSBNO) {
859                         xfs_btree_reada_bufl(cur->bc_mp, be64_to_cpu(b->bb_rightsib), 1);
860                         rval++;
861                 }
862                 break;
863         case XFS_BTNUM_INO:
864                 i = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]);
865                 if ((lr & XFS_BTCUR_LEFTRA) && be32_to_cpu(i->bb_leftsib) != NULLAGBLOCK) {
866                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
867                                 be32_to_cpu(i->bb_leftsib), 1);
868                         rval++;
869                 }
870                 if ((lr & XFS_BTCUR_RIGHTRA) && be32_to_cpu(i->bb_rightsib) != NULLAGBLOCK) {
871                         xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno,
872                                 be32_to_cpu(i->bb_rightsib), 1);
873                         rval++;
874                 }
875                 break;
876         default:
877                 ASSERT(0);
878         }
879         return rval;
880 }
881
882 /*
883  * Set the buffer for level "lev" in the cursor to bp, releasing
884  * any previous buffer.
885  */
886 void
887 xfs_btree_setbuf(
888         xfs_btree_cur_t         *cur,   /* btree cursor */
889         int                     lev,    /* level in btree */
890         xfs_buf_t               *bp)    /* new buffer to set */
891 {
892         xfs_btree_block_t       *b;     /* btree block */
893         xfs_buf_t               *obp;   /* old buffer pointer */
894
895         obp = cur->bc_bufs[lev];
896         if (obp)
897                 xfs_trans_brelse(cur->bc_tp, obp);
898         cur->bc_bufs[lev] = bp;
899         cur->bc_ra[lev] = 0;
900         if (!bp)
901                 return;
902         b = XFS_BUF_TO_BLOCK(bp);
903         if (XFS_BTREE_LONG_PTRS(cur->bc_btnum)) {
904                 if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
905                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
906                 if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
907                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
908         } else {
909                 if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
910                         cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
911                 if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
912                         cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
913         }
914 }