Btrfs: update space balancing code
[linux-2.6] / fs / xfs / xfs_ialloc_btree.c
1 /*
2  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir2.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_mount.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir2_sf.h"
34 #include "xfs_attr_sf.h"
35 #include "xfs_dinode.h"
36 #include "xfs_inode.h"
37 #include "xfs_btree.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_alloc.h"
40 #include "xfs_error.h"
41
42 STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
43 STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
44 STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
45 STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
46 STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
47 STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
48 STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
49 STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
50                 xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
51 STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
52
53 /*
54  * Single level of the xfs_inobt_delete record deletion routine.
55  * Delete record pointed to by cur/level.
56  * Remove the record from its block then rebalance the tree.
57  * Return 0 for error, 1 for done, 2 to go on to the next level.
58  */
59 STATIC int                              /* error */
60 xfs_inobt_delrec(
61         xfs_btree_cur_t         *cur,   /* btree cursor */
62         int                     level,  /* level removing record from */
63         int                     *stat)  /* fail/done/go-on */
64 {
65         xfs_buf_t               *agbp;  /* buffer for a.g. inode header */
66         xfs_mount_t             *mp;    /* mount structure */
67         xfs_agi_t               *agi;   /* allocation group inode header */
68         xfs_inobt_block_t       *block; /* btree block record/key lives in */
69         xfs_agblock_t           bno;    /* btree block number */
70         xfs_buf_t               *bp;    /* buffer for block */
71         int                     error;  /* error return value */
72         int                     i;      /* loop index */
73         xfs_inobt_key_t         key;    /* kp points here if block is level 0 */
74         xfs_inobt_key_t         *kp = NULL;     /* pointer to btree keys */
75         xfs_agblock_t           lbno;   /* left block's block number */
76         xfs_buf_t               *lbp;   /* left block's buffer pointer */
77         xfs_inobt_block_t       *left;  /* left btree block */
78         xfs_inobt_key_t         *lkp;   /* left block key pointer */
79         xfs_inobt_ptr_t         *lpp;   /* left block address pointer */
80         int                     lrecs = 0;      /* number of records in left block */
81         xfs_inobt_rec_t         *lrp;   /* left block record pointer */
82         xfs_inobt_ptr_t         *pp = NULL;     /* pointer to btree addresses */
83         int                     ptr;    /* index in btree block for this rec */
84         xfs_agblock_t           rbno;   /* right block's block number */
85         xfs_buf_t               *rbp;   /* right block's buffer pointer */
86         xfs_inobt_block_t       *right; /* right btree block */
87         xfs_inobt_key_t         *rkp;   /* right block key pointer */
88         xfs_inobt_rec_t         *rp;    /* pointer to btree records */
89         xfs_inobt_ptr_t         *rpp;   /* right block address pointer */
90         int                     rrecs = 0;      /* number of records in right block */
91         int                     numrecs;
92         xfs_inobt_rec_t         *rrp;   /* right block record pointer */
93         xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
94
95         mp = cur->bc_mp;
96
97         /*
98          * Get the index of the entry being deleted, check for nothing there.
99          */
100         ptr = cur->bc_ptrs[level];
101         if (ptr == 0) {
102                 *stat = 0;
103                 return 0;
104         }
105
106         /*
107          * Get the buffer & block containing the record or key/ptr.
108          */
109         bp = cur->bc_bufs[level];
110         block = XFS_BUF_TO_INOBT_BLOCK(bp);
111 #ifdef DEBUG
112         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
113                 return error;
114 #endif
115         /*
116          * Fail if we're off the end of the block.
117          */
118
119         numrecs = be16_to_cpu(block->bb_numrecs);
120         if (ptr > numrecs) {
121                 *stat = 0;
122                 return 0;
123         }
124         /*
125          * It's a nonleaf.  Excise the key and ptr being deleted, by
126          * sliding the entries past them down one.
127          * Log the changed areas of the block.
128          */
129         if (level > 0) {
130                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
131                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
132 #ifdef DEBUG
133                 for (i = ptr; i < numrecs; i++) {
134                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
135                                 return error;
136                 }
137 #endif
138                 if (ptr < numrecs) {
139                         memmove(&kp[ptr - 1], &kp[ptr],
140                                 (numrecs - ptr) * sizeof(*kp));
141                         memmove(&pp[ptr - 1], &pp[ptr],
142                                 (numrecs - ptr) * sizeof(*kp));
143                         xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
144                         xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
145                 }
146         }
147         /*
148          * It's a leaf.  Excise the record being deleted, by sliding the
149          * entries past it down one.  Log the changed areas of the block.
150          */
151         else {
152                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
153                 if (ptr < numrecs) {
154                         memmove(&rp[ptr - 1], &rp[ptr],
155                                 (numrecs - ptr) * sizeof(*rp));
156                         xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
157                 }
158                 /*
159                  * If it's the first record in the block, we'll need a key
160                  * structure to pass up to the next level (updkey).
161                  */
162                 if (ptr == 1) {
163                         key.ir_startino = rp->ir_startino;
164                         kp = &key;
165                 }
166         }
167         /*
168          * Decrement and log the number of entries in the block.
169          */
170         numrecs--;
171         block->bb_numrecs = cpu_to_be16(numrecs);
172         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
173         /*
174          * Is this the root level?  If so, we're almost done.
175          */
176         if (level == cur->bc_nlevels - 1) {
177                 /*
178                  * If this is the root level,
179                  * and there's only one entry left,
180                  * and it's NOT the leaf level,
181                  * then we can get rid of this level.
182                  */
183                 if (numrecs == 1 && level > 0) {
184                         agbp = cur->bc_private.a.agbp;
185                         agi = XFS_BUF_TO_AGI(agbp);
186                         /*
187                          * pp is still set to the first pointer in the block.
188                          * Make it the new root of the btree.
189                          */
190                         bno = be32_to_cpu(agi->agi_root);
191                         agi->agi_root = *pp;
192                         be32_add_cpu(&agi->agi_level, -1);
193                         /*
194                          * Free the block.
195                          */
196                         if ((error = xfs_free_extent(cur->bc_tp,
197                                 XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno, bno), 1)))
198                                 return error;
199                         xfs_trans_binval(cur->bc_tp, bp);
200                         xfs_ialloc_log_agi(cur->bc_tp, agbp,
201                                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
202                         /*
203                          * Update the cursor so there's one fewer level.
204                          */
205                         cur->bc_bufs[level] = NULL;
206                         cur->bc_nlevels--;
207                 } else if (level > 0 &&
208                            (error = xfs_inobt_decrement(cur, level, &i)))
209                         return error;
210                 *stat = 1;
211                 return 0;
212         }
213         /*
214          * If we deleted the leftmost entry in the block, update the
215          * key values above us in the tree.
216          */
217         if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
218                 return error;
219         /*
220          * If the number of records remaining in the block is at least
221          * the minimum, we're done.
222          */
223         if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
224                 if (level > 0 &&
225                     (error = xfs_inobt_decrement(cur, level, &i)))
226                         return error;
227                 *stat = 1;
228                 return 0;
229         }
230         /*
231          * Otherwise, we have to move some records around to keep the
232          * tree balanced.  Look at the left and right sibling blocks to
233          * see if we can re-balance by moving only one record.
234          */
235         rbno = be32_to_cpu(block->bb_rightsib);
236         lbno = be32_to_cpu(block->bb_leftsib);
237         bno = NULLAGBLOCK;
238         ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
239         /*
240          * Duplicate the cursor so our btree manipulations here won't
241          * disrupt the next level up.
242          */
243         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
244                 return error;
245         /*
246          * If there's a right sibling, see if it's ok to shift an entry
247          * out of it.
248          */
249         if (rbno != NULLAGBLOCK) {
250                 /*
251                  * Move the temp cursor to the last entry in the next block.
252                  * Actually any entry but the first would suffice.
253                  */
254                 i = xfs_btree_lastrec(tcur, level);
255                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
256                 if ((error = xfs_inobt_increment(tcur, level, &i)))
257                         goto error0;
258                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
259                 i = xfs_btree_lastrec(tcur, level);
260                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
261                 /*
262                  * Grab a pointer to the block.
263                  */
264                 rbp = tcur->bc_bufs[level];
265                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
266 #ifdef DEBUG
267                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
268                         goto error0;
269 #endif
270                 /*
271                  * Grab the current block number, for future use.
272                  */
273                 bno = be32_to_cpu(right->bb_leftsib);
274                 /*
275                  * If right block is full enough so that removing one entry
276                  * won't make it too empty, and left-shifting an entry out
277                  * of right to us works, we're done.
278                  */
279                 if (be16_to_cpu(right->bb_numrecs) - 1 >=
280                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
281                         if ((error = xfs_inobt_lshift(tcur, level, &i)))
282                                 goto error0;
283                         if (i) {
284                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
285                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
286                                 xfs_btree_del_cursor(tcur,
287                                                      XFS_BTREE_NOERROR);
288                                 if (level > 0 &&
289                                     (error = xfs_inobt_decrement(cur, level,
290                                                 &i)))
291                                         return error;
292                                 *stat = 1;
293                                 return 0;
294                         }
295                 }
296                 /*
297                  * Otherwise, grab the number of records in right for
298                  * future reference, and fix up the temp cursor to point
299                  * to our block again (last record).
300                  */
301                 rrecs = be16_to_cpu(right->bb_numrecs);
302                 if (lbno != NULLAGBLOCK) {
303                         xfs_btree_firstrec(tcur, level);
304                         if ((error = xfs_inobt_decrement(tcur, level, &i)))
305                                 goto error0;
306                 }
307         }
308         /*
309          * If there's a left sibling, see if it's ok to shift an entry
310          * out of it.
311          */
312         if (lbno != NULLAGBLOCK) {
313                 /*
314                  * Move the temp cursor to the first entry in the
315                  * previous block.
316                  */
317                 xfs_btree_firstrec(tcur, level);
318                 if ((error = xfs_inobt_decrement(tcur, level, &i)))
319                         goto error0;
320                 xfs_btree_firstrec(tcur, level);
321                 /*
322                  * Grab a pointer to the block.
323                  */
324                 lbp = tcur->bc_bufs[level];
325                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
326 #ifdef DEBUG
327                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
328                         goto error0;
329 #endif
330                 /*
331                  * Grab the current block number, for future use.
332                  */
333                 bno = be32_to_cpu(left->bb_rightsib);
334                 /*
335                  * If left block is full enough so that removing one entry
336                  * won't make it too empty, and right-shifting an entry out
337                  * of left to us works, we're done.
338                  */
339                 if (be16_to_cpu(left->bb_numrecs) - 1 >=
340                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
341                         if ((error = xfs_inobt_rshift(tcur, level, &i)))
342                                 goto error0;
343                         if (i) {
344                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
345                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
346                                 xfs_btree_del_cursor(tcur,
347                                                      XFS_BTREE_NOERROR);
348                                 if (level == 0)
349                                         cur->bc_ptrs[0]++;
350                                 *stat = 1;
351                                 return 0;
352                         }
353                 }
354                 /*
355                  * Otherwise, grab the number of records in right for
356                  * future reference.
357                  */
358                 lrecs = be16_to_cpu(left->bb_numrecs);
359         }
360         /*
361          * Delete the temp cursor, we're done with it.
362          */
363         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
364         /*
365          * If here, we need to do a join to keep the tree balanced.
366          */
367         ASSERT(bno != NULLAGBLOCK);
368         /*
369          * See if we can join with the left neighbor block.
370          */
371         if (lbno != NULLAGBLOCK &&
372             lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
373                 /*
374                  * Set "right" to be the starting block,
375                  * "left" to be the left neighbor.
376                  */
377                 rbno = bno;
378                 right = block;
379                 rrecs = be16_to_cpu(right->bb_numrecs);
380                 rbp = bp;
381                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
382                                 cur->bc_private.a.agno, lbno, 0, &lbp,
383                                 XFS_INO_BTREE_REF)))
384                         return error;
385                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
386                 lrecs = be16_to_cpu(left->bb_numrecs);
387                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
388                         return error;
389         }
390         /*
391          * If that won't work, see if we can join with the right neighbor block.
392          */
393         else if (rbno != NULLAGBLOCK &&
394                  rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
395                 /*
396                  * Set "left" to be the starting block,
397                  * "right" to be the right neighbor.
398                  */
399                 lbno = bno;
400                 left = block;
401                 lrecs = be16_to_cpu(left->bb_numrecs);
402                 lbp = bp;
403                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
404                                 cur->bc_private.a.agno, rbno, 0, &rbp,
405                                 XFS_INO_BTREE_REF)))
406                         return error;
407                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
408                 rrecs = be16_to_cpu(right->bb_numrecs);
409                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
410                         return error;
411         }
412         /*
413          * Otherwise, we can't fix the imbalance.
414          * Just return.  This is probably a logic error, but it's not fatal.
415          */
416         else {
417                 if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
418                         return error;
419                 *stat = 1;
420                 return 0;
421         }
422         /*
423          * We're now going to join "left" and "right" by moving all the stuff
424          * in "right" to "left" and deleting "right".
425          */
426         if (level > 0) {
427                 /*
428                  * It's a non-leaf.  Move keys and pointers.
429                  */
430                 lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
431                 lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
432                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
433                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
434 #ifdef DEBUG
435                 for (i = 0; i < rrecs; i++) {
436                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
437                                 return error;
438                 }
439 #endif
440                 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
441                 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
442                 xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
443                 xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
444         } else {
445                 /*
446                  * It's a leaf.  Move records.
447                  */
448                 lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
449                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
450                 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
451                 xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
452         }
453         /*
454          * If we joined with the left neighbor, set the buffer in the
455          * cursor to the left block, and fix up the index.
456          */
457         if (bp != lbp) {
458                 xfs_btree_setbuf(cur, level, lbp);
459                 cur->bc_ptrs[level] += lrecs;
460         }
461         /*
462          * If we joined with the right neighbor and there's a level above
463          * us, increment the cursor at that level.
464          */
465         else if (level + 1 < cur->bc_nlevels &&
466                  (error = xfs_alloc_increment(cur, level + 1, &i)))
467                 return error;
468         /*
469          * Fix up the number of records in the surviving block.
470          */
471         lrecs += rrecs;
472         left->bb_numrecs = cpu_to_be16(lrecs);
473         /*
474          * Fix up the right block pointer in the surviving block, and log it.
475          */
476         left->bb_rightsib = right->bb_rightsib;
477         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
478         /*
479          * If there is a right sibling now, make it point to the
480          * remaining block.
481          */
482         if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
483                 xfs_inobt_block_t       *rrblock;
484                 xfs_buf_t               *rrbp;
485
486                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
487                                 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
488                                 &rrbp, XFS_INO_BTREE_REF)))
489                         return error;
490                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
491                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
492                         return error;
493                 rrblock->bb_leftsib = cpu_to_be32(lbno);
494                 xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
495         }
496         /*
497          * Free the deleting block.
498          */
499         if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
500                                      cur->bc_private.a.agno, rbno), 1)))
501                 return error;
502         xfs_trans_binval(cur->bc_tp, rbp);
503         /*
504          * Readjust the ptr at this level if it's not a leaf, since it's
505          * still pointing at the deletion point, which makes the cursor
506          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
507          * We can't use decrement because it would change the next level up.
508          */
509         if (level > 0)
510                 cur->bc_ptrs[level]--;
511         /*
512          * Return value means the next level up has something to do.
513          */
514         *stat = 2;
515         return 0;
516
517 error0:
518         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
519         return error;
520 }
521
522 /*
523  * Insert one record/level.  Return information to the caller
524  * allowing the next level up to proceed if necessary.
525  */
526 STATIC int                              /* error */
527 xfs_inobt_insrec(
528         xfs_btree_cur_t         *cur,   /* btree cursor */
529         int                     level,  /* level to insert record at */
530         xfs_agblock_t           *bnop,  /* i/o: block number inserted */
531         xfs_inobt_rec_t         *recp,  /* i/o: record data inserted */
532         xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
533         int                     *stat)  /* success/failure */
534 {
535         xfs_inobt_block_t       *block; /* btree block record/key lives in */
536         xfs_buf_t               *bp;    /* buffer for block */
537         int                     error;  /* error return value */
538         int                     i;      /* loop index */
539         xfs_inobt_key_t         key;    /* key value being inserted */
540         xfs_inobt_key_t         *kp=NULL;       /* pointer to btree keys */
541         xfs_agblock_t           nbno;   /* block number of allocated block */
542         xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
543         xfs_inobt_key_t         nkey;   /* new key value, from split */
544         xfs_inobt_rec_t         nrec;   /* new record value, for caller */
545         int                     numrecs;
546         int                     optr;   /* old ptr value */
547         xfs_inobt_ptr_t         *pp;    /* pointer to btree addresses */
548         int                     ptr;    /* index in btree block for this rec */
549         xfs_inobt_rec_t         *rp=NULL;       /* pointer to btree records */
550
551         /*
552          * GCC doesn't understand the (arguably complex) control flow in
553          * this function and complains about uninitialized structure fields
554          * without this.
555          */
556         memset(&nrec, 0, sizeof(nrec));
557
558         /*
559          * If we made it to the root level, allocate a new root block
560          * and we're done.
561          */
562         if (level >= cur->bc_nlevels) {
563                 error = xfs_inobt_newroot(cur, &i);
564                 *bnop = NULLAGBLOCK;
565                 *stat = i;
566                 return error;
567         }
568         /*
569          * Make a key out of the record data to be inserted, and save it.
570          */
571         key.ir_startino = recp->ir_startino;
572         optr = ptr = cur->bc_ptrs[level];
573         /*
574          * If we're off the left edge, return failure.
575          */
576         if (ptr == 0) {
577                 *stat = 0;
578                 return 0;
579         }
580         /*
581          * Get pointers to the btree buffer and block.
582          */
583         bp = cur->bc_bufs[level];
584         block = XFS_BUF_TO_INOBT_BLOCK(bp);
585         numrecs = be16_to_cpu(block->bb_numrecs);
586 #ifdef DEBUG
587         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
588                 return error;
589         /*
590          * Check that the new entry is being inserted in the right place.
591          */
592         if (ptr <= numrecs) {
593                 if (level == 0) {
594                         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
595                         xfs_btree_check_rec(cur->bc_btnum, recp, rp);
596                 } else {
597                         kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
598                         xfs_btree_check_key(cur->bc_btnum, &key, kp);
599                 }
600         }
601 #endif
602         nbno = NULLAGBLOCK;
603         ncur = NULL;
604         /*
605          * If the block is full, we can't insert the new entry until we
606          * make the block un-full.
607          */
608         if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
609                 /*
610                  * First, try shifting an entry to the right neighbor.
611                  */
612                 if ((error = xfs_inobt_rshift(cur, level, &i)))
613                         return error;
614                 if (i) {
615                         /* nothing */
616                 }
617                 /*
618                  * Next, try shifting an entry to the left neighbor.
619                  */
620                 else {
621                         if ((error = xfs_inobt_lshift(cur, level, &i)))
622                                 return error;
623                         if (i) {
624                                 optr = ptr = cur->bc_ptrs[level];
625                         } else {
626                                 /*
627                                  * Next, try splitting the current block
628                                  * in half. If this works we have to
629                                  * re-set our variables because
630                                  * we could be in a different block now.
631                                  */
632                                 if ((error = xfs_inobt_split(cur, level, &nbno,
633                                                 &nkey, &ncur, &i)))
634                                         return error;
635                                 if (i) {
636                                         bp = cur->bc_bufs[level];
637                                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
638 #ifdef DEBUG
639                                         if ((error = xfs_btree_check_sblock(cur,
640                                                         block, level, bp)))
641                                                 return error;
642 #endif
643                                         ptr = cur->bc_ptrs[level];
644                                         nrec.ir_startino = nkey.ir_startino;
645                                 } else {
646                                         /*
647                                          * Otherwise the insert fails.
648                                          */
649                                         *stat = 0;
650                                         return 0;
651                                 }
652                         }
653                 }
654         }
655         /*
656          * At this point we know there's room for our new entry in the block
657          * we're pointing at.
658          */
659         numrecs = be16_to_cpu(block->bb_numrecs);
660         if (level > 0) {
661                 /*
662                  * It's a non-leaf entry.  Make a hole for the new data
663                  * in the key and ptr regions of the block.
664                  */
665                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
666                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
667 #ifdef DEBUG
668                 for (i = numrecs; i >= ptr; i--) {
669                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
670                                 return error;
671                 }
672 #endif
673                 memmove(&kp[ptr], &kp[ptr - 1],
674                         (numrecs - ptr + 1) * sizeof(*kp));
675                 memmove(&pp[ptr], &pp[ptr - 1],
676                         (numrecs - ptr + 1) * sizeof(*pp));
677                 /*
678                  * Now stuff the new data in, bump numrecs and log the new data.
679                  */
680 #ifdef DEBUG
681                 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
682                         return error;
683 #endif
684                 kp[ptr - 1] = key;
685                 pp[ptr - 1] = cpu_to_be32(*bnop);
686                 numrecs++;
687                 block->bb_numrecs = cpu_to_be16(numrecs);
688                 xfs_inobt_log_keys(cur, bp, ptr, numrecs);
689                 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
690         } else {
691                 /*
692                  * It's a leaf entry.  Make a hole for the new record.
693                  */
694                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
695                 memmove(&rp[ptr], &rp[ptr - 1],
696                         (numrecs - ptr + 1) * sizeof(*rp));
697                 /*
698                  * Now stuff the new record in, bump numrecs
699                  * and log the new data.
700                  */
701                 rp[ptr - 1] = *recp;
702                 numrecs++;
703                 block->bb_numrecs = cpu_to_be16(numrecs);
704                 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
705         }
706         /*
707          * Log the new number of records in the btree header.
708          */
709         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
710 #ifdef DEBUG
711         /*
712          * Check that the key/record is in the right place, now.
713          */
714         if (ptr < numrecs) {
715                 if (level == 0)
716                         xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
717                                 rp + ptr);
718                 else
719                         xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
720                                 kp + ptr);
721         }
722 #endif
723         /*
724          * If we inserted at the start of a block, update the parents' keys.
725          */
726         if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
727                 return error;
728         /*
729          * Return the new block number, if any.
730          * If there is one, give back a record value and a cursor too.
731          */
732         *bnop = nbno;
733         if (nbno != NULLAGBLOCK) {
734                 *recp = nrec;
735                 *curp = ncur;
736         }
737         *stat = 1;
738         return 0;
739 }
740
741 /*
742  * Log header fields from a btree block.
743  */
744 STATIC void
745 xfs_inobt_log_block(
746         xfs_trans_t             *tp,    /* transaction pointer */
747         xfs_buf_t               *bp,    /* buffer containing btree block */
748         int                     fields) /* mask of fields: XFS_BB_... */
749 {
750         int                     first;  /* first byte offset logged */
751         int                     last;   /* last byte offset logged */
752         static const short      offsets[] = {   /* table of offsets */
753                 offsetof(xfs_inobt_block_t, bb_magic),
754                 offsetof(xfs_inobt_block_t, bb_level),
755                 offsetof(xfs_inobt_block_t, bb_numrecs),
756                 offsetof(xfs_inobt_block_t, bb_leftsib),
757                 offsetof(xfs_inobt_block_t, bb_rightsib),
758                 sizeof(xfs_inobt_block_t)
759         };
760
761         xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
762         xfs_trans_log_buf(tp, bp, first, last);
763 }
764
765 /*
766  * Log keys from a btree block (nonleaf).
767  */
768 STATIC void
769 xfs_inobt_log_keys(
770         xfs_btree_cur_t         *cur,   /* btree cursor */
771         xfs_buf_t               *bp,    /* buffer containing btree block */
772         int                     kfirst, /* index of first key to log */
773         int                     klast)  /* index of last key to log */
774 {
775         xfs_inobt_block_t       *block; /* btree block to log from */
776         int                     first;  /* first byte offset logged */
777         xfs_inobt_key_t         *kp;    /* key pointer in btree block */
778         int                     last;   /* last byte offset logged */
779
780         block = XFS_BUF_TO_INOBT_BLOCK(bp);
781         kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
782         first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
783         last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
784         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
785 }
786
787 /*
788  * Log block pointer fields from a btree block (nonleaf).
789  */
790 STATIC void
791 xfs_inobt_log_ptrs(
792         xfs_btree_cur_t         *cur,   /* btree cursor */
793         xfs_buf_t               *bp,    /* buffer containing btree block */
794         int                     pfirst, /* index of first pointer to log */
795         int                     plast)  /* index of last pointer to log */
796 {
797         xfs_inobt_block_t       *block; /* btree block to log from */
798         int                     first;  /* first byte offset logged */
799         int                     last;   /* last byte offset logged */
800         xfs_inobt_ptr_t         *pp;    /* block-pointer pointer in btree blk */
801
802         block = XFS_BUF_TO_INOBT_BLOCK(bp);
803         pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
804         first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
805         last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
806         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
807 }
808
809 /*
810  * Log records from a btree block (leaf).
811  */
812 STATIC void
813 xfs_inobt_log_recs(
814         xfs_btree_cur_t         *cur,   /* btree cursor */
815         xfs_buf_t               *bp,    /* buffer containing btree block */
816         int                     rfirst, /* index of first record to log */
817         int                     rlast)  /* index of last record to log */
818 {
819         xfs_inobt_block_t       *block; /* btree block to log from */
820         int                     first;  /* first byte offset logged */
821         int                     last;   /* last byte offset logged */
822         xfs_inobt_rec_t         *rp;    /* record pointer for btree block */
823
824         block = XFS_BUF_TO_INOBT_BLOCK(bp);
825         rp = XFS_INOBT_REC_ADDR(block, 1, cur);
826         first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
827         last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
828         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
829 }
830
831 /*
832  * Lookup the record.  The cursor is made to point to it, based on dir.
833  * Return 0 if can't find any such record, 1 for success.
834  */
835 STATIC int                              /* error */
836 xfs_inobt_lookup(
837         xfs_btree_cur_t         *cur,   /* btree cursor */
838         xfs_lookup_t            dir,    /* <=, ==, or >= */
839         int                     *stat)  /* success/failure */
840 {
841         xfs_agblock_t           agbno;  /* a.g. relative btree block number */
842         xfs_agnumber_t          agno;   /* allocation group number */
843         xfs_inobt_block_t       *block=NULL;    /* current btree block */
844         __int64_t               diff;   /* difference for the current key */
845         int                     error;  /* error return value */
846         int                     keyno=0;        /* current key number */
847         int                     level;  /* level in the btree */
848         xfs_mount_t             *mp;    /* file system mount point */
849
850         /*
851          * Get the allocation group header, and the root block number.
852          */
853         mp = cur->bc_mp;
854         {
855                 xfs_agi_t       *agi;   /* a.g. inode header */
856
857                 agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
858                 agno = be32_to_cpu(agi->agi_seqno);
859                 agbno = be32_to_cpu(agi->agi_root);
860         }
861         /*
862          * Iterate over each level in the btree, starting at the root.
863          * For each level above the leaves, find the key we need, based
864          * on the lookup record, then follow the corresponding block
865          * pointer down to the next level.
866          */
867         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
868                 xfs_buf_t       *bp;    /* buffer pointer for btree block */
869                 xfs_daddr_t     d;      /* disk address of btree block */
870
871                 /*
872                  * Get the disk address we're looking for.
873                  */
874                 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
875                 /*
876                  * If the old buffer at this level is for a different block,
877                  * throw it away, otherwise just use it.
878                  */
879                 bp = cur->bc_bufs[level];
880                 if (bp && XFS_BUF_ADDR(bp) != d)
881                         bp = NULL;
882                 if (!bp) {
883                         /*
884                          * Need to get a new buffer.  Read it, then
885                          * set it in the cursor, releasing the old one.
886                          */
887                         if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
888                                         agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
889                                 return error;
890                         xfs_btree_setbuf(cur, level, bp);
891                         /*
892                          * Point to the btree block, now that we have the buffer
893                          */
894                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
895                         if ((error = xfs_btree_check_sblock(cur, block, level,
896                                         bp)))
897                                 return error;
898                 } else
899                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
900                 /*
901                  * If we already had a key match at a higher level, we know
902                  * we need to use the first entry in this block.
903                  */
904                 if (diff == 0)
905                         keyno = 1;
906                 /*
907                  * Otherwise we need to search this block.  Do a binary search.
908                  */
909                 else {
910                         int             high;   /* high entry number */
911                         xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */
912                         xfs_inobt_rec_t *krbase=NULL;/* base of records in block */
913                         int             low;    /* low entry number */
914
915                         /*
916                          * Get a pointer to keys or records.
917                          */
918                         if (level > 0)
919                                 kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
920                         else
921                                 krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
922                         /*
923                          * Set low and high entry numbers, 1-based.
924                          */
925                         low = 1;
926                         if (!(high = be16_to_cpu(block->bb_numrecs))) {
927                                 /*
928                                  * If the block is empty, the tree must
929                                  * be an empty leaf.
930                                  */
931                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
932                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
933                                 *stat = 0;
934                                 return 0;
935                         }
936                         /*
937                          * Binary search the block.
938                          */
939                         while (low <= high) {
940                                 xfs_agino_t     startino;       /* key value */
941
942                                 /*
943                                  * keyno is average of low and high.
944                                  */
945                                 keyno = (low + high) >> 1;
946                                 /*
947                                  * Get startino.
948                                  */
949                                 if (level > 0) {
950                                         xfs_inobt_key_t *kkp;
951
952                                         kkp = kkbase + keyno - 1;
953                                         startino = be32_to_cpu(kkp->ir_startino);
954                                 } else {
955                                         xfs_inobt_rec_t *krp;
956
957                                         krp = krbase + keyno - 1;
958                                         startino = be32_to_cpu(krp->ir_startino);
959                                 }
960                                 /*
961                                  * Compute difference to get next direction.
962                                  */
963                                 diff = (__int64_t)
964                                         startino - cur->bc_rec.i.ir_startino;
965                                 /*
966                                  * Less than, move right.
967                                  */
968                                 if (diff < 0)
969                                         low = keyno + 1;
970                                 /*
971                                  * Greater than, move left.
972                                  */
973                                 else if (diff > 0)
974                                         high = keyno - 1;
975                                 /*
976                                  * Equal, we're done.
977                                  */
978                                 else
979                                         break;
980                         }
981                 }
982                 /*
983                  * If there are more levels, set up for the next level
984                  * by getting the block number and filling in the cursor.
985                  */
986                 if (level > 0) {
987                         /*
988                          * If we moved left, need the previous key number,
989                          * unless there isn't one.
990                          */
991                         if (diff > 0 && --keyno < 1)
992                                 keyno = 1;
993                         agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur));
994 #ifdef DEBUG
995                         if ((error = xfs_btree_check_sptr(cur, agbno, level)))
996                                 return error;
997 #endif
998                         cur->bc_ptrs[level] = keyno;
999                 }
1000         }
1001         /*
1002          * Done with the search.
1003          * See if we need to adjust the results.
1004          */
1005         if (dir != XFS_LOOKUP_LE && diff < 0) {
1006                 keyno++;
1007                 /*
1008                  * If ge search and we went off the end of the block, but it's
1009                  * not the last block, we're in the wrong block.
1010                  */
1011                 if (dir == XFS_LOOKUP_GE &&
1012                     keyno > be16_to_cpu(block->bb_numrecs) &&
1013                     be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1014                         int     i;
1015
1016                         cur->bc_ptrs[0] = keyno;
1017                         if ((error = xfs_inobt_increment(cur, 0, &i)))
1018                                 return error;
1019                         ASSERT(i == 1);
1020                         *stat = 1;
1021                         return 0;
1022                 }
1023         }
1024         else if (dir == XFS_LOOKUP_LE && diff > 0)
1025                 keyno--;
1026         cur->bc_ptrs[0] = keyno;
1027         /*
1028          * Return if we succeeded or not.
1029          */
1030         if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1031                 *stat = 0;
1032         else
1033                 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1034         return 0;
1035 }
1036
1037 /*
1038  * Move 1 record left from cur/level if possible.
1039  * Update cur to reflect the new path.
1040  */
1041 STATIC int                              /* error */
1042 xfs_inobt_lshift(
1043         xfs_btree_cur_t         *cur,   /* btree cursor */
1044         int                     level,  /* level to shift record on */
1045         int                     *stat)  /* success/failure */
1046 {
1047         int                     error;  /* error return value */
1048 #ifdef DEBUG
1049         int                     i;      /* loop index */
1050 #endif
1051         xfs_inobt_key_t         key;    /* key value for leaf level upward */
1052         xfs_buf_t               *lbp;   /* buffer for left neighbor block */
1053         xfs_inobt_block_t       *left;  /* left neighbor btree block */
1054         xfs_inobt_key_t         *lkp=NULL;      /* key pointer for left block */
1055         xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
1056         xfs_inobt_rec_t         *lrp=NULL;      /* record pointer for left block */
1057         int                     nrec;   /* new number of left block entries */
1058         xfs_buf_t               *rbp;   /* buffer for right (current) block */
1059         xfs_inobt_block_t       *right; /* right (current) btree block */
1060         xfs_inobt_key_t         *rkp=NULL;      /* key pointer for right block */
1061         xfs_inobt_ptr_t         *rpp=NULL;      /* address pointer for right block */
1062         xfs_inobt_rec_t         *rrp=NULL;      /* record pointer for right block */
1063
1064         /*
1065          * Set up variables for this block as "right".
1066          */
1067         rbp = cur->bc_bufs[level];
1068         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1069 #ifdef DEBUG
1070         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1071                 return error;
1072 #endif
1073         /*
1074          * If we've got no left sibling then we can't shift an entry left.
1075          */
1076         if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1077                 *stat = 0;
1078                 return 0;
1079         }
1080         /*
1081          * If the cursor entry is the one that would be moved, don't
1082          * do it... it's too complicated.
1083          */
1084         if (cur->bc_ptrs[level] <= 1) {
1085                 *stat = 0;
1086                 return 0;
1087         }
1088         /*
1089          * Set up the left neighbor as "left".
1090          */
1091         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1092                         cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1093                         0, &lbp, XFS_INO_BTREE_REF)))
1094                 return error;
1095         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1096         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1097                 return error;
1098         /*
1099          * If it's full, it can't take another entry.
1100          */
1101         if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1102                 *stat = 0;
1103                 return 0;
1104         }
1105         nrec = be16_to_cpu(left->bb_numrecs) + 1;
1106         /*
1107          * If non-leaf, copy a key and a ptr to the left block.
1108          */
1109         if (level > 0) {
1110                 lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
1111                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1112                 *lkp = *rkp;
1113                 xfs_inobt_log_keys(cur, lbp, nrec, nrec);
1114                 lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
1115                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1116 #ifdef DEBUG
1117                 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1118                         return error;
1119 #endif
1120                 *lpp = *rpp;
1121                 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1122         }
1123         /*
1124          * If leaf, copy a record to the left block.
1125          */
1126         else {
1127                 lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
1128                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1129                 *lrp = *rrp;
1130                 xfs_inobt_log_recs(cur, lbp, nrec, nrec);
1131         }
1132         /*
1133          * Bump and log left's numrecs, decrement and log right's numrecs.
1134          */
1135         be16_add_cpu(&left->bb_numrecs, 1);
1136         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1137 #ifdef DEBUG
1138         if (level > 0)
1139                 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1140         else
1141                 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1142 #endif
1143         be16_add_cpu(&right->bb_numrecs, -1);
1144         xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1145         /*
1146          * Slide the contents of right down one entry.
1147          */
1148         if (level > 0) {
1149 #ifdef DEBUG
1150                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1151                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1152                                         level)))
1153                                 return error;
1154                 }
1155 #endif
1156                 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1157                 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1158                 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1159                 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1160         } else {
1161                 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1162                 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1163                 key.ir_startino = rrp->ir_startino;
1164                 rkp = &key;
1165         }
1166         /*
1167          * Update the parent key values of right.
1168          */
1169         if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
1170                 return error;
1171         /*
1172          * Slide the cursor value left one.
1173          */
1174         cur->bc_ptrs[level]--;
1175         *stat = 1;
1176         return 0;
1177 }
1178
1179 /*
1180  * Allocate a new root block, fill it in.
1181  */
1182 STATIC int                              /* error */
1183 xfs_inobt_newroot(
1184         xfs_btree_cur_t         *cur,   /* btree cursor */
1185         int                     *stat)  /* success/failure */
1186 {
1187         xfs_agi_t               *agi;   /* a.g. inode header */
1188         xfs_alloc_arg_t         args;   /* allocation argument structure */
1189         xfs_inobt_block_t       *block; /* one half of the old root block */
1190         xfs_buf_t               *bp;    /* buffer containing block */
1191         int                     error;  /* error return value */
1192         xfs_inobt_key_t         *kp;    /* btree key pointer */
1193         xfs_agblock_t           lbno;   /* left block number */
1194         xfs_buf_t               *lbp;   /* left buffer pointer */
1195         xfs_inobt_block_t       *left;  /* left btree block */
1196         xfs_buf_t               *nbp;   /* new (root) buffer */
1197         xfs_inobt_block_t       *new;   /* new (root) btree block */
1198         int                     nptr;   /* new value for key index, 1 or 2 */
1199         xfs_inobt_ptr_t         *pp;    /* btree address pointer */
1200         xfs_agblock_t           rbno;   /* right block number */
1201         xfs_buf_t               *rbp;   /* right buffer pointer */
1202         xfs_inobt_block_t       *right; /* right btree block */
1203         xfs_inobt_rec_t         *rp;    /* btree record pointer */
1204
1205         ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
1206
1207         /*
1208          * Get a block & a buffer.
1209          */
1210         agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp);
1211         args.tp = cur->bc_tp;
1212         args.mp = cur->bc_mp;
1213         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno,
1214                 be32_to_cpu(agi->agi_root));
1215         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1216                 args.isfl = args.userdata = args.minalignslop = 0;
1217         args.minlen = args.maxlen = args.prod = 1;
1218         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1219         if ((error = xfs_alloc_vextent(&args)))
1220                 return error;
1221         /*
1222          * None available, we fail.
1223          */
1224         if (args.fsbno == NULLFSBLOCK) {
1225                 *stat = 0;
1226                 return 0;
1227         }
1228         ASSERT(args.len == 1);
1229         nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1230         new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1231         /*
1232          * Set the root data in the a.g. inode structure.
1233          */
1234         agi->agi_root = cpu_to_be32(args.agbno);
1235         be32_add_cpu(&agi->agi_level, 1);
1236         xfs_ialloc_log_agi(args.tp, cur->bc_private.a.agbp,
1237                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1238         /*
1239          * At the previous root level there are now two blocks: the old
1240          * root, and the new block generated when it was split.
1241          * We don't know which one the cursor is pointing at, so we
1242          * set up variables "left" and "right" for each case.
1243          */
1244         bp = cur->bc_bufs[cur->bc_nlevels - 1];
1245         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1246 #ifdef DEBUG
1247         if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1248                 return error;
1249 #endif
1250         if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1251                 /*
1252                  * Our block is left, pick up the right block.
1253                  */
1254                 lbp = bp;
1255                 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1256                 left = block;
1257                 rbno = be32_to_cpu(left->bb_rightsib);
1258                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1259                                 rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1260                         return error;
1261                 bp = rbp;
1262                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1263                 if ((error = xfs_btree_check_sblock(cur, right,
1264                                 cur->bc_nlevels - 1, rbp)))
1265                         return error;
1266                 nptr = 1;
1267         } else {
1268                 /*
1269                  * Our block is right, pick up the left block.
1270                  */
1271                 rbp = bp;
1272                 rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1273                 right = block;
1274                 lbno = be32_to_cpu(right->bb_leftsib);
1275                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1276                                 lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1277                         return error;
1278                 bp = lbp;
1279                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1280                 if ((error = xfs_btree_check_sblock(cur, left,
1281                                 cur->bc_nlevels - 1, lbp)))
1282                         return error;
1283                 nptr = 2;
1284         }
1285         /*
1286          * Fill in the new block's btree header and log it.
1287          */
1288         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1289         new->bb_level = cpu_to_be16(cur->bc_nlevels);
1290         new->bb_numrecs = cpu_to_be16(2);
1291         new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1292         new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1293         xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1294         ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1295         /*
1296          * Fill in the key data in the new root.
1297          */
1298         kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1299         if (be16_to_cpu(left->bb_level) > 0) {
1300                 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur);
1301                 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur);
1302         } else {
1303                 rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1304                 kp[0].ir_startino = rp->ir_startino;
1305                 rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1306                 kp[1].ir_startino = rp->ir_startino;
1307         }
1308         xfs_inobt_log_keys(cur, nbp, 1, 2);
1309         /*
1310          * Fill in the pointer data in the new root.
1311          */
1312         pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1313         pp[0] = cpu_to_be32(lbno);
1314         pp[1] = cpu_to_be32(rbno);
1315         xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1316         /*
1317          * Fix up the cursor.
1318          */
1319         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1320         cur->bc_ptrs[cur->bc_nlevels] = nptr;
1321         cur->bc_nlevels++;
1322         *stat = 1;
1323         return 0;
1324 }
1325
1326 /*
1327  * Move 1 record right from cur/level if possible.
1328  * Update cur to reflect the new path.
1329  */
1330 STATIC int                              /* error */
1331 xfs_inobt_rshift(
1332         xfs_btree_cur_t         *cur,   /* btree cursor */
1333         int                     level,  /* level to shift record on */
1334         int                     *stat)  /* success/failure */
1335 {
1336         int                     error;  /* error return value */
1337         int                     i;      /* loop index */
1338         xfs_inobt_key_t         key;    /* key value for leaf level upward */
1339         xfs_buf_t               *lbp;   /* buffer for left (current) block */
1340         xfs_inobt_block_t       *left;  /* left (current) btree block */
1341         xfs_inobt_key_t         *lkp;   /* key pointer for left block */
1342         xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
1343         xfs_inobt_rec_t         *lrp;   /* record pointer for left block */
1344         xfs_buf_t               *rbp;   /* buffer for right neighbor block */
1345         xfs_inobt_block_t       *right; /* right neighbor btree block */
1346         xfs_inobt_key_t         *rkp;   /* key pointer for right block */
1347         xfs_inobt_ptr_t         *rpp;   /* address pointer for right block */
1348         xfs_inobt_rec_t         *rrp=NULL;      /* record pointer for right block */
1349         xfs_btree_cur_t         *tcur;  /* temporary cursor */
1350
1351         /*
1352          * Set up variables for this block as "left".
1353          */
1354         lbp = cur->bc_bufs[level];
1355         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1356 #ifdef DEBUG
1357         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1358                 return error;
1359 #endif
1360         /*
1361          * If we've got no right sibling then we can't shift an entry right.
1362          */
1363         if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1364                 *stat = 0;
1365                 return 0;
1366         }
1367         /*
1368          * If the cursor entry is the one that would be moved, don't
1369          * do it... it's too complicated.
1370          */
1371         if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1372                 *stat = 0;
1373                 return 0;
1374         }
1375         /*
1376          * Set up the right neighbor as "right".
1377          */
1378         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1379                         cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1380                         0, &rbp, XFS_INO_BTREE_REF)))
1381                 return error;
1382         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1383         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1384                 return error;
1385         /*
1386          * If it's full, it can't take another entry.
1387          */
1388         if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1389                 *stat = 0;
1390                 return 0;
1391         }
1392         /*
1393          * Make a hole at the start of the right neighbor block, then
1394          * copy the last left block entry to the hole.
1395          */
1396         if (level > 0) {
1397                 lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1398                 lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1399                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1400                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1401 #ifdef DEBUG
1402                 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1403                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1404                                 return error;
1405                 }
1406 #endif
1407                 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1408                 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1409 #ifdef DEBUG
1410                 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1411                         return error;
1412 #endif
1413                 *rkp = *lkp;
1414                 *rpp = *lpp;
1415                 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1416                 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1417         } else {
1418                 lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1419                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1420                 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1421                 *rrp = *lrp;
1422                 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1423                 key.ir_startino = rrp->ir_startino;
1424                 rkp = &key;
1425         }
1426         /*
1427          * Decrement and log left's numrecs, bump and log right's numrecs.
1428          */
1429         be16_add_cpu(&left->bb_numrecs, -1);
1430         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1431         be16_add_cpu(&right->bb_numrecs, 1);
1432 #ifdef DEBUG
1433         if (level > 0)
1434                 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1435         else
1436                 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1437 #endif
1438         xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1439         /*
1440          * Using a temporary cursor, update the parent key values of the
1441          * block on the right.
1442          */
1443         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1444                 return error;
1445         xfs_btree_lastrec(tcur, level);
1446         if ((error = xfs_inobt_increment(tcur, level, &i)) ||
1447             (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
1448                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1449                 return error;
1450         }
1451         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1452         *stat = 1;
1453         return 0;
1454 }
1455
1456 /*
1457  * Split cur/level block in half.
1458  * Return new block number and its first record (to be inserted into parent).
1459  */
1460 STATIC int                              /* error */
1461 xfs_inobt_split(
1462         xfs_btree_cur_t         *cur,   /* btree cursor */
1463         int                     level,  /* level to split */
1464         xfs_agblock_t           *bnop,  /* output: block number allocated */
1465         xfs_inobt_key_t         *keyp,  /* output: first key of new block */
1466         xfs_btree_cur_t         **curp, /* output: new cursor */
1467         int                     *stat)  /* success/failure */
1468 {
1469         xfs_alloc_arg_t         args;   /* allocation argument structure */
1470         int                     error;  /* error return value */
1471         int                     i;      /* loop index/record number */
1472         xfs_agblock_t           lbno;   /* left (current) block number */
1473         xfs_buf_t               *lbp;   /* buffer for left block */
1474         xfs_inobt_block_t       *left;  /* left (current) btree block */
1475         xfs_inobt_key_t         *lkp;   /* left btree key pointer */
1476         xfs_inobt_ptr_t         *lpp;   /* left btree address pointer */
1477         xfs_inobt_rec_t         *lrp;   /* left btree record pointer */
1478         xfs_buf_t               *rbp;   /* buffer for right block */
1479         xfs_inobt_block_t       *right; /* right (new) btree block */
1480         xfs_inobt_key_t         *rkp;   /* right btree key pointer */
1481         xfs_inobt_ptr_t         *rpp;   /* right btree address pointer */
1482         xfs_inobt_rec_t         *rrp;   /* right btree record pointer */
1483
1484         /*
1485          * Set up left block (current one).
1486          */
1487         lbp = cur->bc_bufs[level];
1488         args.tp = cur->bc_tp;
1489         args.mp = cur->bc_mp;
1490         lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1491         /*
1492          * Allocate the new block.
1493          * If we can't do it, we're toast.  Give up.
1494          */
1495         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, lbno);
1496         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1497                 args.isfl = args.userdata = args.minalignslop = 0;
1498         args.minlen = args.maxlen = args.prod = 1;
1499         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1500         if ((error = xfs_alloc_vextent(&args)))
1501                 return error;
1502         if (args.fsbno == NULLFSBLOCK) {
1503                 *stat = 0;
1504                 return 0;
1505         }
1506         ASSERT(args.len == 1);
1507         rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1508         /*
1509          * Set up the new block as "right".
1510          */
1511         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1512         /*
1513          * "Left" is the current (according to the cursor) block.
1514          */
1515         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1516 #ifdef DEBUG
1517         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1518                 return error;
1519 #endif
1520         /*
1521          * Fill in the btree header for the new block.
1522          */
1523         right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1524         right->bb_level = left->bb_level;
1525         right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1526         /*
1527          * Make sure that if there's an odd number of entries now, that
1528          * each new block will have the same number of entries.
1529          */
1530         if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1531             cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1532                 be16_add_cpu(&right->bb_numrecs, 1);
1533         i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1534         /*
1535          * For non-leaf blocks, copy keys and addresses over to the new block.
1536          */
1537         if (level > 0) {
1538                 lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1539                 lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1540                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1541                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1542 #ifdef DEBUG
1543                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1544                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1545                                 return error;
1546                 }
1547 #endif
1548                 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1549                 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1550                 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1551                 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1552                 *keyp = *rkp;
1553         }
1554         /*
1555          * For leaf blocks, copy records over to the new block.
1556          */
1557         else {
1558                 lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1559                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1560                 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1561                 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1562                 keyp->ir_startino = rrp->ir_startino;
1563         }
1564         /*
1565          * Find the left block number by looking in the buffer.
1566          * Adjust numrecs, sibling pointers.
1567          */
1568         be16_add_cpu(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1569         right->bb_rightsib = left->bb_rightsib;
1570         left->bb_rightsib = cpu_to_be32(args.agbno);
1571         right->bb_leftsib = cpu_to_be32(lbno);
1572         xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1573         xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1574         /*
1575          * If there's a block to the new block's right, make that block
1576          * point back to right instead of to left.
1577          */
1578         if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1579                 xfs_inobt_block_t       *rrblock;       /* rr btree block */
1580                 xfs_buf_t               *rrbp;          /* buffer for rrblock */
1581
1582                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1583                                 be32_to_cpu(right->bb_rightsib), 0, &rrbp,
1584                                 XFS_INO_BTREE_REF)))
1585                         return error;
1586                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1587                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1588                         return error;
1589                 rrblock->bb_leftsib = cpu_to_be32(args.agbno);
1590                 xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1591         }
1592         /*
1593          * If the cursor is really in the right block, move it there.
1594          * If it's just pointing past the last entry in left, then we'll
1595          * insert there, so don't change anything in that case.
1596          */
1597         if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1598                 xfs_btree_setbuf(cur, level, rbp);
1599                 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1600         }
1601         /*
1602          * If there are more levels, we'll need another cursor which refers
1603          * the right block, no matter where this cursor was.
1604          */
1605         if (level + 1 < cur->bc_nlevels) {
1606                 if ((error = xfs_btree_dup_cursor(cur, curp)))
1607                         return error;
1608                 (*curp)->bc_ptrs[level + 1]++;
1609         }
1610         *bnop = args.agbno;
1611         *stat = 1;
1612         return 0;
1613 }
1614
1615 /*
1616  * Update keys at all levels from here to the root along the cursor's path.
1617  */
1618 STATIC int                              /* error */
1619 xfs_inobt_updkey(
1620         xfs_btree_cur_t         *cur,   /* btree cursor */
1621         xfs_inobt_key_t         *keyp,  /* new key value to update to */
1622         int                     level)  /* starting level for update */
1623 {
1624         int                     ptr;    /* index of key in block */
1625
1626         /*
1627          * Go up the tree from this level toward the root.
1628          * At each level, update the key value to the value input.
1629          * Stop when we reach a level where the cursor isn't pointing
1630          * at the first entry in the block.
1631          */
1632         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1633                 xfs_buf_t               *bp;    /* buffer for block */
1634                 xfs_inobt_block_t       *block; /* btree block */
1635 #ifdef DEBUG
1636                 int                     error;  /* error return value */
1637 #endif
1638                 xfs_inobt_key_t         *kp;    /* ptr to btree block keys */
1639
1640                 bp = cur->bc_bufs[level];
1641                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1642 #ifdef DEBUG
1643                 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1644                         return error;
1645 #endif
1646                 ptr = cur->bc_ptrs[level];
1647                 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
1648                 *kp = *keyp;
1649                 xfs_inobt_log_keys(cur, bp, ptr, ptr);
1650         }
1651         return 0;
1652 }
1653
1654 /*
1655  * Externally visible routines.
1656  */
1657
1658 /*
1659  * Decrement cursor by one record at the level.
1660  * For nonzero levels the leaf-ward information is untouched.
1661  */
1662 int                                     /* error */
1663 xfs_inobt_decrement(
1664         xfs_btree_cur_t         *cur,   /* btree cursor */
1665         int                     level,  /* level in btree, 0 is leaf */
1666         int                     *stat)  /* success/failure */
1667 {
1668         xfs_inobt_block_t       *block; /* btree block */
1669         int                     error;
1670         int                     lev;    /* btree level */
1671
1672         ASSERT(level < cur->bc_nlevels);
1673         /*
1674          * Read-ahead to the left at this level.
1675          */
1676         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1677         /*
1678          * Decrement the ptr at this level.  If we're still in the block
1679          * then we're done.
1680          */
1681         if (--cur->bc_ptrs[level] > 0) {
1682                 *stat = 1;
1683                 return 0;
1684         }
1685         /*
1686          * Get a pointer to the btree block.
1687          */
1688         block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
1689 #ifdef DEBUG
1690         if ((error = xfs_btree_check_sblock(cur, block, level,
1691                         cur->bc_bufs[level])))
1692                 return error;
1693 #endif
1694         /*
1695          * If we just went off the left edge of the tree, return failure.
1696          */
1697         if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1698                 *stat = 0;
1699                 return 0;
1700         }
1701         /*
1702          * March up the tree decrementing pointers.
1703          * Stop when we don't go off the left edge of a block.
1704          */
1705         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1706                 if (--cur->bc_ptrs[lev] > 0)
1707                         break;
1708                 /*
1709                  * Read-ahead the left block, we're going to read it
1710                  * in the next loop.
1711                  */
1712                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1713         }
1714         /*
1715          * If we went off the root then we are seriously confused.
1716          */
1717         ASSERT(lev < cur->bc_nlevels);
1718         /*
1719          * Now walk back down the tree, fixing up the cursor's buffer
1720          * pointers and key numbers.
1721          */
1722         for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1723                 xfs_agblock_t   agbno;  /* block number of btree block */
1724                 xfs_buf_t       *bp;    /* buffer containing btree block */
1725
1726                 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1727                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1728                                 cur->bc_private.a.agno, agbno, 0, &bp,
1729                                 XFS_INO_BTREE_REF)))
1730                         return error;
1731                 lev--;
1732                 xfs_btree_setbuf(cur, lev, bp);
1733                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1734                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1735                         return error;
1736                 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1737         }
1738         *stat = 1;
1739         return 0;
1740 }
1741
1742 /*
1743  * Delete the record pointed to by cur.
1744  * The cursor refers to the place where the record was (could be inserted)
1745  * when the operation returns.
1746  */
1747 int                                     /* error */
1748 xfs_inobt_delete(
1749         xfs_btree_cur_t *cur,           /* btree cursor */
1750         int             *stat)          /* success/failure */
1751 {
1752         int             error;
1753         int             i;              /* result code */
1754         int             level;          /* btree level */
1755
1756         /*
1757          * Go up the tree, starting at leaf level.
1758          * If 2 is returned then a join was done; go to the next level.
1759          * Otherwise we are done.
1760          */
1761         for (level = 0, i = 2; i == 2; level++) {
1762                 if ((error = xfs_inobt_delrec(cur, level, &i)))
1763                         return error;
1764         }
1765         if (i == 0) {
1766                 for (level = 1; level < cur->bc_nlevels; level++) {
1767                         if (cur->bc_ptrs[level] == 0) {
1768                                 if ((error = xfs_inobt_decrement(cur, level, &i)))
1769                                         return error;
1770                                 break;
1771                         }
1772                 }
1773         }
1774         *stat = i;
1775         return 0;
1776 }
1777
1778
1779 /*
1780  * Get the data from the pointed-to record.
1781  */
1782 int                                     /* error */
1783 xfs_inobt_get_rec(
1784         xfs_btree_cur_t         *cur,   /* btree cursor */
1785         xfs_agino_t             *ino,   /* output: starting inode of chunk */
1786         __int32_t               *fcnt,  /* output: number of free inodes */
1787         xfs_inofree_t           *free,  /* output: free inode mask */
1788         int                     *stat)  /* output: success/failure */
1789 {
1790         xfs_inobt_block_t       *block; /* btree block */
1791         xfs_buf_t               *bp;    /* buffer containing btree block */
1792 #ifdef DEBUG
1793         int                     error;  /* error return value */
1794 #endif
1795         int                     ptr;    /* record number */
1796         xfs_inobt_rec_t         *rec;   /* record data */
1797
1798         bp = cur->bc_bufs[0];
1799         ptr = cur->bc_ptrs[0];
1800         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1801 #ifdef DEBUG
1802         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1803                 return error;
1804 #endif
1805         /*
1806          * Off the right end or left end, return failure.
1807          */
1808         if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1809                 *stat = 0;
1810                 return 0;
1811         }
1812         /*
1813          * Point to the record and extract its data.
1814          */
1815         rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1816         *ino = be32_to_cpu(rec->ir_startino);
1817         *fcnt = be32_to_cpu(rec->ir_freecount);
1818         *free = be64_to_cpu(rec->ir_free);
1819         *stat = 1;
1820         return 0;
1821 }
1822
1823 /*
1824  * Increment cursor by one record at the level.
1825  * For nonzero levels the leaf-ward information is untouched.
1826  */
1827 int                                     /* error */
1828 xfs_inobt_increment(
1829         xfs_btree_cur_t         *cur,   /* btree cursor */
1830         int                     level,  /* level in btree, 0 is leaf */
1831         int                     *stat)  /* success/failure */
1832 {
1833         xfs_inobt_block_t       *block; /* btree block */
1834         xfs_buf_t               *bp;    /* buffer containing btree block */
1835         int                     error;  /* error return value */
1836         int                     lev;    /* btree level */
1837
1838         ASSERT(level < cur->bc_nlevels);
1839         /*
1840          * Read-ahead to the right at this level.
1841          */
1842         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1843         /*
1844          * Get a pointer to the btree block.
1845          */
1846         bp = cur->bc_bufs[level];
1847         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1848 #ifdef DEBUG
1849         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1850                 return error;
1851 #endif
1852         /*
1853          * Increment the ptr at this level.  If we're still in the block
1854          * then we're done.
1855          */
1856         if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1857                 *stat = 1;
1858                 return 0;
1859         }
1860         /*
1861          * If we just went off the right edge of the tree, return failure.
1862          */
1863         if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1864                 *stat = 0;
1865                 return 0;
1866         }
1867         /*
1868          * March up the tree incrementing pointers.
1869          * Stop when we don't go off the right edge of a block.
1870          */
1871         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1872                 bp = cur->bc_bufs[lev];
1873                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1874 #ifdef DEBUG
1875                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1876                         return error;
1877 #endif
1878                 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
1879                         break;
1880                 /*
1881                  * Read-ahead the right block, we're going to read it
1882                  * in the next loop.
1883                  */
1884                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1885         }
1886         /*
1887          * If we went off the root then we are seriously confused.
1888          */
1889         ASSERT(lev < cur->bc_nlevels);
1890         /*
1891          * Now walk back down the tree, fixing up the cursor's buffer
1892          * pointers and key numbers.
1893          */
1894         for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
1895              lev > level; ) {
1896                 xfs_agblock_t   agbno;  /* block number of btree block */
1897
1898                 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1899                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1900                                 cur->bc_private.a.agno, agbno, 0, &bp,
1901                                 XFS_INO_BTREE_REF)))
1902                         return error;
1903                 lev--;
1904                 xfs_btree_setbuf(cur, lev, bp);
1905                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1906                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1907                         return error;
1908                 cur->bc_ptrs[lev] = 1;
1909         }
1910         *stat = 1;
1911         return 0;
1912 }
1913
1914 /*
1915  * Insert the current record at the point referenced by cur.
1916  * The cursor may be inconsistent on return if splits have been done.
1917  */
1918 int                                     /* error */
1919 xfs_inobt_insert(
1920         xfs_btree_cur_t *cur,           /* btree cursor */
1921         int             *stat)          /* success/failure */
1922 {
1923         int             error;          /* error return value */
1924         int             i;              /* result value, 0 for failure */
1925         int             level;          /* current level number in btree */
1926         xfs_agblock_t   nbno;           /* new block number (split result) */
1927         xfs_btree_cur_t *ncur;          /* new cursor (split result) */
1928         xfs_inobt_rec_t nrec;           /* record being inserted this level */
1929         xfs_btree_cur_t *pcur;          /* previous level's cursor */
1930
1931         level = 0;
1932         nbno = NULLAGBLOCK;
1933         nrec.ir_startino = cpu_to_be32(cur->bc_rec.i.ir_startino);
1934         nrec.ir_freecount = cpu_to_be32(cur->bc_rec.i.ir_freecount);
1935         nrec.ir_free = cpu_to_be64(cur->bc_rec.i.ir_free);
1936         ncur = NULL;
1937         pcur = cur;
1938         /*
1939          * Loop going up the tree, starting at the leaf level.
1940          * Stop when we don't get a split block, that must mean that
1941          * the insert is finished with this level.
1942          */
1943         do {
1944                 /*
1945                  * Insert nrec/nbno into this level of the tree.
1946                  * Note if we fail, nbno will be null.
1947                  */
1948                 if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1949                                 &i))) {
1950                         if (pcur != cur)
1951                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1952                         return error;
1953                 }
1954                 /*
1955                  * See if the cursor we just used is trash.
1956                  * Can't trash the caller's cursor, but otherwise we should
1957                  * if ncur is a new cursor or we're about to be done.
1958                  */
1959                 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1960                         cur->bc_nlevels = pcur->bc_nlevels;
1961                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1962                 }
1963                 /*
1964                  * If we got a new cursor, switch to it.
1965                  */
1966                 if (ncur) {
1967                         pcur = ncur;
1968                         ncur = NULL;
1969                 }
1970         } while (nbno != NULLAGBLOCK);
1971         *stat = i;
1972         return 0;
1973 }
1974
1975 /*
1976  * Lookup the record equal to ino in the btree given by cur.
1977  */
1978 int                                     /* error */
1979 xfs_inobt_lookup_eq(
1980         xfs_btree_cur_t *cur,           /* btree cursor */
1981         xfs_agino_t     ino,            /* starting inode of chunk */
1982         __int32_t       fcnt,           /* free inode count */
1983         xfs_inofree_t   free,           /* free inode mask */
1984         int             *stat)          /* success/failure */
1985 {
1986         cur->bc_rec.i.ir_startino = ino;
1987         cur->bc_rec.i.ir_freecount = fcnt;
1988         cur->bc_rec.i.ir_free = free;
1989         return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
1990 }
1991
1992 /*
1993  * Lookup the first record greater than or equal to ino
1994  * in the btree given by cur.
1995  */
1996 int                                     /* error */
1997 xfs_inobt_lookup_ge(
1998         xfs_btree_cur_t *cur,           /* btree cursor */
1999         xfs_agino_t     ino,            /* starting inode of chunk */
2000         __int32_t       fcnt,           /* free inode count */
2001         xfs_inofree_t   free,           /* free inode mask */
2002         int             *stat)          /* success/failure */
2003 {
2004         cur->bc_rec.i.ir_startino = ino;
2005         cur->bc_rec.i.ir_freecount = fcnt;
2006         cur->bc_rec.i.ir_free = free;
2007         return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
2008 }
2009
2010 /*
2011  * Lookup the first record less than or equal to ino
2012  * in the btree given by cur.
2013  */
2014 int                                     /* error */
2015 xfs_inobt_lookup_le(
2016         xfs_btree_cur_t *cur,           /* btree cursor */
2017         xfs_agino_t     ino,            /* starting inode of chunk */
2018         __int32_t       fcnt,           /* free inode count */
2019         xfs_inofree_t   free,           /* free inode mask */
2020         int             *stat)          /* success/failure */
2021 {
2022         cur->bc_rec.i.ir_startino = ino;
2023         cur->bc_rec.i.ir_freecount = fcnt;
2024         cur->bc_rec.i.ir_free = free;
2025         return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
2026 }
2027
2028 /*
2029  * Update the record referred to by cur, to the value given
2030  * by [ino, fcnt, free].
2031  * This either works (return 0) or gets an EFSCORRUPTED error.
2032  */
2033 int                                     /* error */
2034 xfs_inobt_update(
2035         xfs_btree_cur_t         *cur,   /* btree cursor */
2036         xfs_agino_t             ino,    /* starting inode of chunk */
2037         __int32_t               fcnt,   /* free inode count */
2038         xfs_inofree_t           free)   /* free inode mask */
2039 {
2040         xfs_inobt_block_t       *block; /* btree block to update */
2041         xfs_buf_t               *bp;    /* buffer containing btree block */
2042         int                     error;  /* error return value */
2043         int                     ptr;    /* current record number (updating) */
2044         xfs_inobt_rec_t         *rp;    /* pointer to updated record */
2045
2046         /*
2047          * Pick up the current block.
2048          */
2049         bp = cur->bc_bufs[0];
2050         block = XFS_BUF_TO_INOBT_BLOCK(bp);
2051 #ifdef DEBUG
2052         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
2053                 return error;
2054 #endif
2055         /*
2056          * Get the address of the rec to be updated.
2057          */
2058         ptr = cur->bc_ptrs[0];
2059         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
2060         /*
2061          * Fill in the new contents and log them.
2062          */
2063         rp->ir_startino = cpu_to_be32(ino);
2064         rp->ir_freecount = cpu_to_be32(fcnt);
2065         rp->ir_free = cpu_to_be64(free);
2066         xfs_inobt_log_recs(cur, bp, ptr, ptr);
2067         /*
2068          * Updating first record in leaf. Pass new key value up to our parent.
2069          */
2070         if (ptr == 1) {
2071                 xfs_inobt_key_t key;    /* key containing [ino] */
2072
2073                 key.ir_startino = cpu_to_be32(ino);
2074                 if ((error = xfs_inobt_updkey(cur, &key, 1)))
2075                         return error;
2076         }
2077         return 0;
2078 }