Merge ../torvalds-2.6/
[linux-2.6] / fs / xfs / xfs_ialloc_btree.c
1 /*
2  * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32
33 #include "xfs.h"
34
35 #include "xfs_macros.h"
36 #include "xfs_types.h"
37 #include "xfs_inum.h"
38 #include "xfs_log.h"
39 #include "xfs_trans.h"
40 #include "xfs_sb.h"
41 #include "xfs_ag.h"
42 #include "xfs_dir.h"
43 #include "xfs_dmapi.h"
44 #include "xfs_mount.h"
45 #include "xfs_alloc_btree.h"
46 #include "xfs_bmap_btree.h"
47 #include "xfs_ialloc_btree.h"
48 #include "xfs_btree.h"
49 #include "xfs_ialloc.h"
50 #include "xfs_alloc.h"
51 #include "xfs_error.h"
52
53 /*
54  * Inode allocation management for XFS.
55  */
56
57 /*
58  * Prototypes for internal functions.
59  */
60
61 STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
62 STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
63 STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
64 STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
65 STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
66 STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
67 STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
68 STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
69                 xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
70 STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
71
72 /*
73  * Internal functions.
74  */
75
76 /*
77  * Single level of the xfs_inobt_delete record deletion routine.
78  * Delete record pointed to by cur/level.
79  * Remove the record from its block then rebalance the tree.
80  * Return 0 for error, 1 for done, 2 to go on to the next level.
81  */
82 STATIC int                              /* error */
83 xfs_inobt_delrec(
84         xfs_btree_cur_t         *cur,   /* btree cursor */
85         int                     level,  /* level removing record from */
86         int                     *stat)  /* fail/done/go-on */
87 {
88         xfs_buf_t               *agbp;  /* buffer for a.g. inode header */
89         xfs_mount_t             *mp;    /* mount structure */
90         xfs_agi_t               *agi;   /* allocation group inode header */
91         xfs_inobt_block_t       *block; /* btree block record/key lives in */
92         xfs_agblock_t           bno;    /* btree block number */
93         xfs_buf_t               *bp;    /* buffer for block */
94         int                     error;  /* error return value */
95         int                     i;      /* loop index */
96         xfs_inobt_key_t         key;    /* kp points here if block is level 0 */
97         xfs_inobt_key_t         *kp = NULL;     /* pointer to btree keys */
98         xfs_agblock_t           lbno;   /* left block's block number */
99         xfs_buf_t               *lbp;   /* left block's buffer pointer */
100         xfs_inobt_block_t       *left;  /* left btree block */
101         xfs_inobt_key_t         *lkp;   /* left block key pointer */
102         xfs_inobt_ptr_t         *lpp;   /* left block address pointer */
103         int                     lrecs = 0;      /* number of records in left block */
104         xfs_inobt_rec_t         *lrp;   /* left block record pointer */
105         xfs_inobt_ptr_t         *pp = NULL;     /* pointer to btree addresses */
106         int                     ptr;    /* index in btree block for this rec */
107         xfs_agblock_t           rbno;   /* right block's block number */
108         xfs_buf_t               *rbp;   /* right block's buffer pointer */
109         xfs_inobt_block_t       *right; /* right btree block */
110         xfs_inobt_key_t         *rkp;   /* right block key pointer */
111         xfs_inobt_rec_t         *rp;    /* pointer to btree records */
112         xfs_inobt_ptr_t         *rpp;   /* right block address pointer */
113         int                     rrecs = 0;      /* number of records in right block */
114         int                     numrecs;
115         xfs_inobt_rec_t         *rrp;   /* right block record pointer */
116         xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
117
118         mp = cur->bc_mp;
119
120         /*
121          * Get the index of the entry being deleted, check for nothing there.
122          */
123         ptr = cur->bc_ptrs[level];
124         if (ptr == 0) {
125                 *stat = 0;
126                 return 0;
127         }
128
129         /*
130          * Get the buffer & block containing the record or key/ptr.
131          */
132         bp = cur->bc_bufs[level];
133         block = XFS_BUF_TO_INOBT_BLOCK(bp);
134 #ifdef DEBUG
135         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
136                 return error;
137 #endif
138         /*
139          * Fail if we're off the end of the block.
140          */
141
142         numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
143         if (ptr > numrecs) {
144                 *stat = 0;
145                 return 0;
146         }
147         /*
148          * It's a nonleaf.  Excise the key and ptr being deleted, by
149          * sliding the entries past them down one.
150          * Log the changed areas of the block.
151          */
152         if (level > 0) {
153                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
154                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
155 #ifdef DEBUG
156                 for (i = ptr; i < numrecs; i++) {
157                         if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)))
158                                 return error;
159                 }
160 #endif
161                 if (ptr < numrecs) {
162                         memmove(&kp[ptr - 1], &kp[ptr],
163                                 (numrecs - ptr) * sizeof(*kp));
164                         memmove(&pp[ptr - 1], &pp[ptr],
165                                 (numrecs - ptr) * sizeof(*kp));
166                         xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
167                         xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
168                 }
169         }
170         /*
171          * It's a leaf.  Excise the record being deleted, by sliding the
172          * entries past it down one.  Log the changed areas of the block.
173          */
174         else {
175                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
176                 if (ptr < numrecs) {
177                         memmove(&rp[ptr - 1], &rp[ptr],
178                                 (numrecs - ptr) * sizeof(*rp));
179                         xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
180                 }
181                 /*
182                  * If it's the first record in the block, we'll need a key
183                  * structure to pass up to the next level (updkey).
184                  */
185                 if (ptr == 1) {
186                         key.ir_startino = rp->ir_startino;
187                         kp = &key;
188                 }
189         }
190         /*
191          * Decrement and log the number of entries in the block.
192          */
193         numrecs--;
194         INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
195         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
196         /*
197          * Is this the root level?  If so, we're almost done.
198          */
199         if (level == cur->bc_nlevels - 1) {
200                 /*
201                  * If this is the root level,
202                  * and there's only one entry left,
203                  * and it's NOT the leaf level,
204                  * then we can get rid of this level.
205                  */
206                 if (numrecs == 1 && level > 0) {
207                         agbp = cur->bc_private.i.agbp;
208                         agi = XFS_BUF_TO_AGI(agbp);
209                         /*
210                          * pp is still set to the first pointer in the block.
211                          * Make it the new root of the btree.
212                          */
213                         bno = INT_GET(agi->agi_root, ARCH_CONVERT);
214                         agi->agi_root = *pp;
215                         INT_MOD(agi->agi_level, ARCH_CONVERT, -1);
216                         /*
217                          * Free the block.
218                          */
219                         if ((error = xfs_free_extent(cur->bc_tp,
220                                 XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1)))
221                                 return error;
222                         xfs_trans_binval(cur->bc_tp, bp);
223                         xfs_ialloc_log_agi(cur->bc_tp, agbp,
224                                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
225                         /*
226                          * Update the cursor so there's one fewer level.
227                          */
228                         cur->bc_bufs[level] = NULL;
229                         cur->bc_nlevels--;
230                 } else if (level > 0 &&
231                            (error = xfs_inobt_decrement(cur, level, &i)))
232                         return error;
233                 *stat = 1;
234                 return 0;
235         }
236         /*
237          * If we deleted the leftmost entry in the block, update the
238          * key values above us in the tree.
239          */
240         if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
241                 return error;
242         /*
243          * If the number of records remaining in the block is at least
244          * the minimum, we're done.
245          */
246         if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
247                 if (level > 0 &&
248                     (error = xfs_inobt_decrement(cur, level, &i)))
249                         return error;
250                 *stat = 1;
251                 return 0;
252         }
253         /*
254          * Otherwise, we have to move some records around to keep the
255          * tree balanced.  Look at the left and right sibling blocks to
256          * see if we can re-balance by moving only one record.
257          */
258         rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
259         lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
260         bno = NULLAGBLOCK;
261         ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
262         /*
263          * Duplicate the cursor so our btree manipulations here won't
264          * disrupt the next level up.
265          */
266         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
267                 return error;
268         /*
269          * If there's a right sibling, see if it's ok to shift an entry
270          * out of it.
271          */
272         if (rbno != NULLAGBLOCK) {
273                 /*
274                  * Move the temp cursor to the last entry in the next block.
275                  * Actually any entry but the first would suffice.
276                  */
277                 i = xfs_btree_lastrec(tcur, level);
278                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
279                 if ((error = xfs_inobt_increment(tcur, level, &i)))
280                         goto error0;
281                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
282                 i = xfs_btree_lastrec(tcur, level);
283                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
284                 /*
285                  * Grab a pointer to the block.
286                  */
287                 rbp = tcur->bc_bufs[level];
288                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
289 #ifdef DEBUG
290                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
291                         goto error0;
292 #endif
293                 /*
294                  * Grab the current block number, for future use.
295                  */
296                 bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
297                 /*
298                  * If right block is full enough so that removing one entry
299                  * won't make it too empty, and left-shifting an entry out
300                  * of right to us works, we're done.
301                  */
302                 if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
303                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
304                         if ((error = xfs_inobt_lshift(tcur, level, &i)))
305                                 goto error0;
306                         if (i) {
307                                 ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
308                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
309                                 xfs_btree_del_cursor(tcur,
310                                                      XFS_BTREE_NOERROR);
311                                 if (level > 0 &&
312                                     (error = xfs_inobt_decrement(cur, level,
313                                                 &i)))
314                                         return error;
315                                 *stat = 1;
316                                 return 0;
317                         }
318                 }
319                 /*
320                  * Otherwise, grab the number of records in right for
321                  * future reference, and fix up the temp cursor to point
322                  * to our block again (last record).
323                  */
324                 rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
325                 if (lbno != NULLAGBLOCK) {
326                         xfs_btree_firstrec(tcur, level);
327                         if ((error = xfs_inobt_decrement(tcur, level, &i)))
328                                 goto error0;
329                 }
330         }
331         /*
332          * If there's a left sibling, see if it's ok to shift an entry
333          * out of it.
334          */
335         if (lbno != NULLAGBLOCK) {
336                 /*
337                  * Move the temp cursor to the first entry in the
338                  * previous block.
339                  */
340                 xfs_btree_firstrec(tcur, level);
341                 if ((error = xfs_inobt_decrement(tcur, level, &i)))
342                         goto error0;
343                 xfs_btree_firstrec(tcur, level);
344                 /*
345                  * Grab a pointer to the block.
346                  */
347                 lbp = tcur->bc_bufs[level];
348                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
349 #ifdef DEBUG
350                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
351                         goto error0;
352 #endif
353                 /*
354                  * Grab the current block number, for future use.
355                  */
356                 bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
357                 /*
358                  * If left block is full enough so that removing one entry
359                  * won't make it too empty, and right-shifting an entry out
360                  * of left to us works, we're done.
361                  */
362                 if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
363                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
364                         if ((error = xfs_inobt_rshift(tcur, level, &i)))
365                                 goto error0;
366                         if (i) {
367                                 ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
368                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
369                                 xfs_btree_del_cursor(tcur,
370                                                      XFS_BTREE_NOERROR);
371                                 if (level == 0)
372                                         cur->bc_ptrs[0]++;
373                                 *stat = 1;
374                                 return 0;
375                         }
376                 }
377                 /*
378                  * Otherwise, grab the number of records in right for
379                  * future reference.
380                  */
381                 lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
382         }
383         /*
384          * Delete the temp cursor, we're done with it.
385          */
386         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
387         /*
388          * If here, we need to do a join to keep the tree balanced.
389          */
390         ASSERT(bno != NULLAGBLOCK);
391         /*
392          * See if we can join with the left neighbor block.
393          */
394         if (lbno != NULLAGBLOCK &&
395             lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
396                 /*
397                  * Set "right" to be the starting block,
398                  * "left" to be the left neighbor.
399                  */
400                 rbno = bno;
401                 right = block;
402                 rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
403                 rbp = bp;
404                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
405                                 cur->bc_private.i.agno, lbno, 0, &lbp,
406                                 XFS_INO_BTREE_REF)))
407                         return error;
408                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
409                 lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
410                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
411                         return error;
412         }
413         /*
414          * If that won't work, see if we can join with the right neighbor block.
415          */
416         else if (rbno != NULLAGBLOCK &&
417                  rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
418                 /*
419                  * Set "left" to be the starting block,
420                  * "right" to be the right neighbor.
421                  */
422                 lbno = bno;
423                 left = block;
424                 lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
425                 lbp = bp;
426                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
427                                 cur->bc_private.i.agno, rbno, 0, &rbp,
428                                 XFS_INO_BTREE_REF)))
429                         return error;
430                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
431                 rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
432                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
433                         return error;
434         }
435         /*
436          * Otherwise, we can't fix the imbalance.
437          * Just return.  This is probably a logic error, but it's not fatal.
438          */
439         else {
440                 if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
441                         return error;
442                 *stat = 1;
443                 return 0;
444         }
445         /*
446          * We're now going to join "left" and "right" by moving all the stuff
447          * in "right" to "left" and deleting "right".
448          */
449         if (level > 0) {
450                 /*
451                  * It's a non-leaf.  Move keys and pointers.
452                  */
453                 lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
454                 lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
455                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
456                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
457 #ifdef DEBUG
458                 for (i = 0; i < rrecs; i++) {
459                         if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
460                                 return error;
461                 }
462 #endif
463                 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
464                 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
465                 xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
466                 xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
467         } else {
468                 /*
469                  * It's a leaf.  Move records.
470                  */
471                 lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
472                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
473                 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
474                 xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
475         }
476         /*
477          * If we joined with the left neighbor, set the buffer in the
478          * cursor to the left block, and fix up the index.
479          */
480         if (bp != lbp) {
481                 xfs_btree_setbuf(cur, level, lbp);
482                 cur->bc_ptrs[level] += lrecs;
483         }
484         /*
485          * If we joined with the right neighbor and there's a level above
486          * us, increment the cursor at that level.
487          */
488         else if (level + 1 < cur->bc_nlevels &&
489                  (error = xfs_alloc_increment(cur, level + 1, &i)))
490                 return error;
491         /*
492          * Fix up the number of records in the surviving block.
493          */
494         lrecs += rrecs;
495         INT_SET(left->bb_numrecs, ARCH_CONVERT, lrecs);
496         /*
497          * Fix up the right block pointer in the surviving block, and log it.
498          */
499         left->bb_rightsib = right->bb_rightsib;
500         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
501         /*
502          * If there is a right sibling now, make it point to the
503          * remaining block.
504          */
505         if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
506                 xfs_inobt_block_t       *rrblock;
507                 xfs_buf_t               *rrbp;
508
509                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
510                                 cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
511                                 &rrbp, XFS_INO_BTREE_REF)))
512                         return error;
513                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
514                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
515                         return error;
516                 INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
517                 xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
518         }
519         /*
520          * Free the deleting block.
521          */
522         if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
523                                      cur->bc_private.i.agno, rbno), 1)))
524                 return error;
525         xfs_trans_binval(cur->bc_tp, rbp);
526         /*
527          * Readjust the ptr at this level if it's not a leaf, since it's
528          * still pointing at the deletion point, which makes the cursor
529          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
530          * We can't use decrement because it would change the next level up.
531          */
532         if (level > 0)
533                 cur->bc_ptrs[level]--;
534         /*
535          * Return value means the next level up has something to do.
536          */
537         *stat = 2;
538         return 0;
539
540 error0:
541         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
542         return error;
543 }
544
545 /*
546  * Insert one record/level.  Return information to the caller
547  * allowing the next level up to proceed if necessary.
548  */
549 STATIC int                              /* error */
550 xfs_inobt_insrec(
551         xfs_btree_cur_t         *cur,   /* btree cursor */
552         int                     level,  /* level to insert record at */
553         xfs_agblock_t           *bnop,  /* i/o: block number inserted */
554         xfs_inobt_rec_t         *recp,  /* i/o: record data inserted */
555         xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
556         int                     *stat)  /* success/failure */
557 {
558         xfs_inobt_block_t       *block; /* btree block record/key lives in */
559         xfs_buf_t               *bp;    /* buffer for block */
560         int                     error;  /* error return value */
561         int                     i;      /* loop index */
562         xfs_inobt_key_t         key;    /* key value being inserted */
563         xfs_inobt_key_t         *kp=NULL;       /* pointer to btree keys */
564         xfs_agblock_t           nbno;   /* block number of allocated block */
565         xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
566         xfs_inobt_key_t         nkey;   /* new key value, from split */
567         xfs_inobt_rec_t         nrec;   /* new record value, for caller */
568         int                     numrecs;
569         int                     optr;   /* old ptr value */
570         xfs_inobt_ptr_t         *pp;    /* pointer to btree addresses */
571         int                     ptr;    /* index in btree block for this rec */
572         xfs_inobt_rec_t         *rp=NULL;       /* pointer to btree records */
573
574         /*
575          * If we made it to the root level, allocate a new root block
576          * and we're done.
577          */
578         if (level >= cur->bc_nlevels) {
579                 error = xfs_inobt_newroot(cur, &i);
580                 *bnop = NULLAGBLOCK;
581                 *stat = i;
582                 return error;
583         }
584         /*
585          * Make a key out of the record data to be inserted, and save it.
586          */
587         key.ir_startino = recp->ir_startino; /* INT_: direct copy */
588         optr = ptr = cur->bc_ptrs[level];
589         /*
590          * If we're off the left edge, return failure.
591          */
592         if (ptr == 0) {
593                 *stat = 0;
594                 return 0;
595         }
596         /*
597          * Get pointers to the btree buffer and block.
598          */
599         bp = cur->bc_bufs[level];
600         block = XFS_BUF_TO_INOBT_BLOCK(bp);
601         numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
602 #ifdef DEBUG
603         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
604                 return error;
605         /*
606          * Check that the new entry is being inserted in the right place.
607          */
608         if (ptr <= numrecs) {
609                 if (level == 0) {
610                         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
611                         xfs_btree_check_rec(cur->bc_btnum, recp, rp);
612                 } else {
613                         kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
614                         xfs_btree_check_key(cur->bc_btnum, &key, kp);
615                 }
616         }
617 #endif
618         nbno = NULLAGBLOCK;
619         ncur = (xfs_btree_cur_t *)0;
620         /*
621          * If the block is full, we can't insert the new entry until we
622          * make the block un-full.
623          */
624         if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
625                 /*
626                  * First, try shifting an entry to the right neighbor.
627                  */
628                 if ((error = xfs_inobt_rshift(cur, level, &i)))
629                         return error;
630                 if (i) {
631                         /* nothing */
632                 }
633                 /*
634                  * Next, try shifting an entry to the left neighbor.
635                  */
636                 else {
637                         if ((error = xfs_inobt_lshift(cur, level, &i)))
638                                 return error;
639                         if (i) {
640                                 optr = ptr = cur->bc_ptrs[level];
641                         } else {
642                                 /*
643                                  * Next, try splitting the current block
644                                  * in half. If this works we have to
645                                  * re-set our variables because
646                                  * we could be in a different block now.
647                                  */
648                                 if ((error = xfs_inobt_split(cur, level, &nbno,
649                                                 &nkey, &ncur, &i)))
650                                         return error;
651                                 if (i) {
652                                         bp = cur->bc_bufs[level];
653                                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
654 #ifdef DEBUG
655                                         if ((error = xfs_btree_check_sblock(cur,
656                                                         block, level, bp)))
657                                                 return error;
658 #endif
659                                         ptr = cur->bc_ptrs[level];
660                                         nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */
661                                 } else {
662                                         /*
663                                          * Otherwise the insert fails.
664                                          */
665                                         *stat = 0;
666                                         return 0;
667                                 }
668                         }
669                 }
670         }
671         /*
672          * At this point we know there's room for our new entry in the block
673          * we're pointing at.
674          */
675         numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
676         if (level > 0) {
677                 /*
678                  * It's a non-leaf entry.  Make a hole for the new data
679                  * in the key and ptr regions of the block.
680                  */
681                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
682                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
683 #ifdef DEBUG
684                 for (i = numrecs; i >= ptr; i--) {
685                         if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level)))
686                                 return error;
687                 }
688 #endif
689                 memmove(&kp[ptr], &kp[ptr - 1],
690                         (numrecs - ptr + 1) * sizeof(*kp));
691                 memmove(&pp[ptr], &pp[ptr - 1],
692                         (numrecs - ptr + 1) * sizeof(*pp));
693                 /*
694                  * Now stuff the new data in, bump numrecs and log the new data.
695                  */
696 #ifdef DEBUG
697                 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
698                         return error;
699 #endif
700                 kp[ptr - 1] = key; /* INT_: struct copy */
701                 INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
702                 numrecs++;
703                 INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
704                 xfs_inobt_log_keys(cur, bp, ptr, numrecs);
705                 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
706         } else {
707                 /*
708                  * It's a leaf entry.  Make a hole for the new record.
709                  */
710                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
711                 memmove(&rp[ptr], &rp[ptr - 1],
712                         (numrecs - ptr + 1) * sizeof(*rp));
713                 /*
714                  * Now stuff the new record in, bump numrecs
715                  * and log the new data.
716                  */
717                 rp[ptr - 1] = *recp; /* INT_: struct copy */
718                 numrecs++;
719                 INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
720                 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
721         }
722         /*
723          * Log the new number of records in the btree header.
724          */
725         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
726 #ifdef DEBUG
727         /*
728          * Check that the key/record is in the right place, now.
729          */
730         if (ptr < numrecs) {
731                 if (level == 0)
732                         xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
733                                 rp + ptr);
734                 else
735                         xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
736                                 kp + ptr);
737         }
738 #endif
739         /*
740          * If we inserted at the start of a block, update the parents' keys.
741          */
742         if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
743                 return error;
744         /*
745          * Return the new block number, if any.
746          * If there is one, give back a record value and a cursor too.
747          */
748         *bnop = nbno;
749         if (nbno != NULLAGBLOCK) {
750                 *recp = nrec; /* INT_: struct copy */
751                 *curp = ncur;
752         }
753         *stat = 1;
754         return 0;
755 }
756
757 /*
758  * Log header fields from a btree block.
759  */
760 STATIC void
761 xfs_inobt_log_block(
762         xfs_trans_t             *tp,    /* transaction pointer */
763         xfs_buf_t               *bp,    /* buffer containing btree block */
764         int                     fields) /* mask of fields: XFS_BB_... */
765 {
766         int                     first;  /* first byte offset logged */
767         int                     last;   /* last byte offset logged */
768         static const short      offsets[] = {   /* table of offsets */
769                 offsetof(xfs_inobt_block_t, bb_magic),
770                 offsetof(xfs_inobt_block_t, bb_level),
771                 offsetof(xfs_inobt_block_t, bb_numrecs),
772                 offsetof(xfs_inobt_block_t, bb_leftsib),
773                 offsetof(xfs_inobt_block_t, bb_rightsib),
774                 sizeof(xfs_inobt_block_t)
775         };
776
777         xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
778         xfs_trans_log_buf(tp, bp, first, last);
779 }
780
781 /*
782  * Log keys from a btree block (nonleaf).
783  */
784 STATIC void
785 xfs_inobt_log_keys(
786         xfs_btree_cur_t         *cur,   /* btree cursor */
787         xfs_buf_t               *bp,    /* buffer containing btree block */
788         int                     kfirst, /* index of first key to log */
789         int                     klast)  /* index of last key to log */
790 {
791         xfs_inobt_block_t       *block; /* btree block to log from */
792         int                     first;  /* first byte offset logged */
793         xfs_inobt_key_t         *kp;    /* key pointer in btree block */
794         int                     last;   /* last byte offset logged */
795
796         block = XFS_BUF_TO_INOBT_BLOCK(bp);
797         kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
798         first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
799         last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
800         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
801 }
802
803 /*
804  * Log block pointer fields from a btree block (nonleaf).
805  */
806 STATIC void
807 xfs_inobt_log_ptrs(
808         xfs_btree_cur_t         *cur,   /* btree cursor */
809         xfs_buf_t               *bp,    /* buffer containing btree block */
810         int                     pfirst, /* index of first pointer to log */
811         int                     plast)  /* index of last pointer to log */
812 {
813         xfs_inobt_block_t       *block; /* btree block to log from */
814         int                     first;  /* first byte offset logged */
815         int                     last;   /* last byte offset logged */
816         xfs_inobt_ptr_t         *pp;    /* block-pointer pointer in btree blk */
817
818         block = XFS_BUF_TO_INOBT_BLOCK(bp);
819         pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
820         first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
821         last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
822         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
823 }
824
825 /*
826  * Log records from a btree block (leaf).
827  */
828 STATIC void
829 xfs_inobt_log_recs(
830         xfs_btree_cur_t         *cur,   /* btree cursor */
831         xfs_buf_t               *bp,    /* buffer containing btree block */
832         int                     rfirst, /* index of first record to log */
833         int                     rlast)  /* index of last record to log */
834 {
835         xfs_inobt_block_t       *block; /* btree block to log from */
836         int                     first;  /* first byte offset logged */
837         int                     last;   /* last byte offset logged */
838         xfs_inobt_rec_t         *rp;    /* record pointer for btree block */
839
840         block = XFS_BUF_TO_INOBT_BLOCK(bp);
841         rp = XFS_INOBT_REC_ADDR(block, 1, cur);
842         first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
843         last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
844         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
845 }
846
847 /*
848  * Lookup the record.  The cursor is made to point to it, based on dir.
849  * Return 0 if can't find any such record, 1 for success.
850  */
851 STATIC int                              /* error */
852 xfs_inobt_lookup(
853         xfs_btree_cur_t         *cur,   /* btree cursor */
854         xfs_lookup_t            dir,    /* <=, ==, or >= */
855         int                     *stat)  /* success/failure */
856 {
857         xfs_agblock_t           agbno;  /* a.g. relative btree block number */
858         xfs_agnumber_t          agno;   /* allocation group number */
859         xfs_inobt_block_t       *block=NULL;    /* current btree block */
860         __int64_t               diff;   /* difference for the current key */
861         int                     error;  /* error return value */
862         int                     keyno=0;        /* current key number */
863         int                     level;  /* level in the btree */
864         xfs_mount_t             *mp;    /* file system mount point */
865
866         /*
867          * Get the allocation group header, and the root block number.
868          */
869         mp = cur->bc_mp;
870         {
871                 xfs_agi_t       *agi;   /* a.g. inode header */
872
873                 agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
874                 agno = INT_GET(agi->agi_seqno, ARCH_CONVERT);
875                 agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
876         }
877         /*
878          * Iterate over each level in the btree, starting at the root.
879          * For each level above the leaves, find the key we need, based
880          * on the lookup record, then follow the corresponding block
881          * pointer down to the next level.
882          */
883         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
884                 xfs_buf_t       *bp;    /* buffer pointer for btree block */
885                 xfs_daddr_t     d;      /* disk address of btree block */
886
887                 /*
888                  * Get the disk address we're looking for.
889                  */
890                 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
891                 /*
892                  * If the old buffer at this level is for a different block,
893                  * throw it away, otherwise just use it.
894                  */
895                 bp = cur->bc_bufs[level];
896                 if (bp && XFS_BUF_ADDR(bp) != d)
897                         bp = (xfs_buf_t *)0;
898                 if (!bp) {
899                         /*
900                          * Need to get a new buffer.  Read it, then
901                          * set it in the cursor, releasing the old one.
902                          */
903                         if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
904                                         agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
905                                 return error;
906                         xfs_btree_setbuf(cur, level, bp);
907                         /*
908                          * Point to the btree block, now that we have the buffer
909                          */
910                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
911                         if ((error = xfs_btree_check_sblock(cur, block, level,
912                                         bp)))
913                                 return error;
914                 } else
915                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
916                 /*
917                  * If we already had a key match at a higher level, we know
918                  * we need to use the first entry in this block.
919                  */
920                 if (diff == 0)
921                         keyno = 1;
922                 /*
923                  * Otherwise we need to search this block.  Do a binary search.
924                  */
925                 else {
926                         int             high;   /* high entry number */
927                         xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */
928                         xfs_inobt_rec_t *krbase=NULL;/* base of records in block */
929                         int             low;    /* low entry number */
930
931                         /*
932                          * Get a pointer to keys or records.
933                          */
934                         if (level > 0)
935                                 kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
936                         else
937                                 krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
938                         /*
939                          * Set low and high entry numbers, 1-based.
940                          */
941                         low = 1;
942                         if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
943                                 /*
944                                  * If the block is empty, the tree must
945                                  * be an empty leaf.
946                                  */
947                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
948                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
949                                 *stat = 0;
950                                 return 0;
951                         }
952                         /*
953                          * Binary search the block.
954                          */
955                         while (low <= high) {
956                                 xfs_agino_t     startino;       /* key value */
957
958                                 /*
959                                  * keyno is average of low and high.
960                                  */
961                                 keyno = (low + high) >> 1;
962                                 /*
963                                  * Get startino.
964                                  */
965                                 if (level > 0) {
966                                         xfs_inobt_key_t *kkp;
967
968                                         kkp = kkbase + keyno - 1;
969                                         startino = INT_GET(kkp->ir_startino, ARCH_CONVERT);
970                                 } else {
971                                         xfs_inobt_rec_t *krp;
972
973                                         krp = krbase + keyno - 1;
974                                         startino = INT_GET(krp->ir_startino, ARCH_CONVERT);
975                                 }
976                                 /*
977                                  * Compute difference to get next direction.
978                                  */
979                                 diff = (__int64_t)
980                                         startino - cur->bc_rec.i.ir_startino;
981                                 /*
982                                  * Less than, move right.
983                                  */
984                                 if (diff < 0)
985                                         low = keyno + 1;
986                                 /*
987                                  * Greater than, move left.
988                                  */
989                                 else if (diff > 0)
990                                         high = keyno - 1;
991                                 /*
992                                  * Equal, we're done.
993                                  */
994                                 else
995                                         break;
996                         }
997                 }
998                 /*
999                  * If there are more levels, set up for the next level
1000                  * by getting the block number and filling in the cursor.
1001                  */
1002                 if (level > 0) {
1003                         /*
1004                          * If we moved left, need the previous key number,
1005                          * unless there isn't one.
1006                          */
1007                         if (diff > 0 && --keyno < 1)
1008                                 keyno = 1;
1009                         agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, keyno, cur), ARCH_CONVERT);
1010 #ifdef DEBUG
1011                         if ((error = xfs_btree_check_sptr(cur, agbno, level)))
1012                                 return error;
1013 #endif
1014                         cur->bc_ptrs[level] = keyno;
1015                 }
1016         }
1017         /*
1018          * Done with the search.
1019          * See if we need to adjust the results.
1020          */
1021         if (dir != XFS_LOOKUP_LE && diff < 0) {
1022                 keyno++;
1023                 /*
1024                  * If ge search and we went off the end of the block, but it's
1025                  * not the last block, we're in the wrong block.
1026                  */
1027                 if (dir == XFS_LOOKUP_GE &&
1028                     keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
1029                     INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
1030                         int     i;
1031
1032                         cur->bc_ptrs[0] = keyno;
1033                         if ((error = xfs_inobt_increment(cur, 0, &i)))
1034                                 return error;
1035                         ASSERT(i == 1);
1036                         *stat = 1;
1037                         return 0;
1038                 }
1039         }
1040         else if (dir == XFS_LOOKUP_LE && diff > 0)
1041                 keyno--;
1042         cur->bc_ptrs[0] = keyno;
1043         /*
1044          * Return if we succeeded or not.
1045          */
1046         if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT))
1047                 *stat = 0;
1048         else
1049                 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1050         return 0;
1051 }
1052
1053 /*
1054  * Move 1 record left from cur/level if possible.
1055  * Update cur to reflect the new path.
1056  */
1057 STATIC int                              /* error */
1058 xfs_inobt_lshift(
1059         xfs_btree_cur_t         *cur,   /* btree cursor */
1060         int                     level,  /* level to shift record on */
1061         int                     *stat)  /* success/failure */
1062 {
1063         int                     error;  /* error return value */
1064 #ifdef DEBUG
1065         int                     i;      /* loop index */
1066 #endif
1067         xfs_inobt_key_t         key;    /* key value for leaf level upward */
1068         xfs_buf_t               *lbp;   /* buffer for left neighbor block */
1069         xfs_inobt_block_t       *left;  /* left neighbor btree block */
1070         xfs_inobt_key_t         *lkp=NULL;      /* key pointer for left block */
1071         xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
1072         xfs_inobt_rec_t         *lrp=NULL;      /* record pointer for left block */
1073         int                     nrec;   /* new number of left block entries */
1074         xfs_buf_t               *rbp;   /* buffer for right (current) block */
1075         xfs_inobt_block_t       *right; /* right (current) btree block */
1076         xfs_inobt_key_t         *rkp=NULL;      /* key pointer for right block */
1077         xfs_inobt_ptr_t         *rpp=NULL;      /* address pointer for right block */
1078         xfs_inobt_rec_t         *rrp=NULL;      /* record pointer for right block */
1079
1080         /*
1081          * Set up variables for this block as "right".
1082          */
1083         rbp = cur->bc_bufs[level];
1084         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1085 #ifdef DEBUG
1086         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1087                 return error;
1088 #endif
1089         /*
1090          * If we've got no left sibling then we can't shift an entry left.
1091          */
1092         if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
1093                 *stat = 0;
1094                 return 0;
1095         }
1096         /*
1097          * If the cursor entry is the one that would be moved, don't
1098          * do it... it's too complicated.
1099          */
1100         if (cur->bc_ptrs[level] <= 1) {
1101                 *stat = 0;
1102                 return 0;
1103         }
1104         /*
1105          * Set up the left neighbor as "left".
1106          */
1107         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1108                         cur->bc_private.i.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp,
1109                         XFS_INO_BTREE_REF)))
1110                 return error;
1111         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1112         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1113                 return error;
1114         /*
1115          * If it's full, it can't take another entry.
1116          */
1117         if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1118                 *stat = 0;
1119                 return 0;
1120         }
1121         nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
1122         /*
1123          * If non-leaf, copy a key and a ptr to the left block.
1124          */
1125         if (level > 0) {
1126                 lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
1127                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1128                 *lkp = *rkp;
1129                 xfs_inobt_log_keys(cur, lbp, nrec, nrec);
1130                 lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
1131                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1132 #ifdef DEBUG
1133                 if ((error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)))
1134                         return error;
1135 #endif
1136                 *lpp = *rpp; /* INT_: no-change copy */
1137                 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1138         }
1139         /*
1140          * If leaf, copy a record to the left block.
1141          */
1142         else {
1143                 lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
1144                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1145                 *lrp = *rrp;
1146                 xfs_inobt_log_recs(cur, lbp, nrec, nrec);
1147         }
1148         /*
1149          * Bump and log left's numrecs, decrement and log right's numrecs.
1150          */
1151         INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
1152         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1153 #ifdef DEBUG
1154         if (level > 0)
1155                 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1156         else
1157                 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1158 #endif
1159         INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
1160         xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1161         /*
1162          * Slide the contents of right down one entry.
1163          */
1164         if (level > 0) {
1165 #ifdef DEBUG
1166                 for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
1167                         if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
1168                                         level)))
1169                                 return error;
1170                 }
1171 #endif
1172                 memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
1173                 memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
1174                 xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1175                 xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1176         } else {
1177                 memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
1178                 xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1179                 key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1180                 rkp = &key;
1181         }
1182         /*
1183          * Update the parent key values of right.
1184          */
1185         if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
1186                 return error;
1187         /*
1188          * Slide the cursor value left one.
1189          */
1190         cur->bc_ptrs[level]--;
1191         *stat = 1;
1192         return 0;
1193 }
1194
1195 /*
1196  * Allocate a new root block, fill it in.
1197  */
1198 STATIC int                              /* error */
1199 xfs_inobt_newroot(
1200         xfs_btree_cur_t         *cur,   /* btree cursor */
1201         int                     *stat)  /* success/failure */
1202 {
1203         xfs_agi_t               *agi;   /* a.g. inode header */
1204         xfs_alloc_arg_t         args;   /* allocation argument structure */
1205         xfs_inobt_block_t       *block; /* one half of the old root block */
1206         xfs_buf_t               *bp;    /* buffer containing block */
1207         int                     error;  /* error return value */
1208         xfs_inobt_key_t         *kp;    /* btree key pointer */
1209         xfs_agblock_t           lbno;   /* left block number */
1210         xfs_buf_t               *lbp;   /* left buffer pointer */
1211         xfs_inobt_block_t       *left;  /* left btree block */
1212         xfs_buf_t               *nbp;   /* new (root) buffer */
1213         xfs_inobt_block_t       *new;   /* new (root) btree block */
1214         int                     nptr;   /* new value for key index, 1 or 2 */
1215         xfs_inobt_ptr_t         *pp;    /* btree address pointer */
1216         xfs_agblock_t           rbno;   /* right block number */
1217         xfs_buf_t               *rbp;   /* right buffer pointer */
1218         xfs_inobt_block_t       *right; /* right btree block */
1219         xfs_inobt_rec_t         *rp;    /* btree record pointer */
1220
1221         ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
1222
1223         /*
1224          * Get a block & a buffer.
1225          */
1226         agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
1227         args.tp = cur->bc_tp;
1228         args.mp = cur->bc_mp;
1229         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno,
1230                 INT_GET(agi->agi_root, ARCH_CONVERT));
1231         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1232                 args.isfl = args.userdata = args.minalignslop = 0;
1233         args.minlen = args.maxlen = args.prod = 1;
1234         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1235         if ((error = xfs_alloc_vextent(&args)))
1236                 return error;
1237         /*
1238          * None available, we fail.
1239          */
1240         if (args.fsbno == NULLFSBLOCK) {
1241                 *stat = 0;
1242                 return 0;
1243         }
1244         ASSERT(args.len == 1);
1245         nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1246         new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1247         /*
1248          * Set the root data in the a.g. inode structure.
1249          */
1250         INT_SET(agi->agi_root, ARCH_CONVERT, args.agbno);
1251         INT_MOD(agi->agi_level, ARCH_CONVERT, 1);
1252         xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
1253                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1254         /*
1255          * At the previous root level there are now two blocks: the old
1256          * root, and the new block generated when it was split.
1257          * We don't know which one the cursor is pointing at, so we
1258          * set up variables "left" and "right" for each case.
1259          */
1260         bp = cur->bc_bufs[cur->bc_nlevels - 1];
1261         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1262 #ifdef DEBUG
1263         if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1264                 return error;
1265 #endif
1266         if (INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
1267                 /*
1268                  * Our block is left, pick up the right block.
1269                  */
1270                 lbp = bp;
1271                 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1272                 left = block;
1273                 rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
1274                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1275                                 rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1276                         return error;
1277                 bp = rbp;
1278                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1279                 if ((error = xfs_btree_check_sblock(cur, right,
1280                                 cur->bc_nlevels - 1, rbp)))
1281                         return error;
1282                 nptr = 1;
1283         } else {
1284                 /*
1285                  * Our block is right, pick up the left block.
1286                  */
1287                 rbp = bp;
1288                 rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1289                 right = block;
1290                 lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
1291                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1292                                 lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1293                         return error;
1294                 bp = lbp;
1295                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1296                 if ((error = xfs_btree_check_sblock(cur, left,
1297                                 cur->bc_nlevels - 1, lbp)))
1298                         return error;
1299                 nptr = 2;
1300         }
1301         /*
1302          * Fill in the new block's btree header and log it.
1303          */
1304         INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
1305         INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels);
1306         INT_SET(new->bb_numrecs, ARCH_CONVERT, 2);
1307         INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
1308         INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
1309         xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1310         ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1311         /*
1312          * Fill in the key data in the new root.
1313          */
1314         kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1315         if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) {
1316                 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */
1317                 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */
1318         } else {
1319                 rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1320                 INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT);
1321                 rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1322                 INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT);
1323         }
1324         xfs_inobt_log_keys(cur, nbp, 1, 2);
1325         /*
1326          * Fill in the pointer data in the new root.
1327          */
1328         pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1329         INT_SET(pp[0], ARCH_CONVERT, lbno);
1330         INT_SET(pp[1], ARCH_CONVERT, rbno);
1331         xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1332         /*
1333          * Fix up the cursor.
1334          */
1335         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1336         cur->bc_ptrs[cur->bc_nlevels] = nptr;
1337         cur->bc_nlevels++;
1338         *stat = 1;
1339         return 0;
1340 }
1341
1342 /*
1343  * Move 1 record right from cur/level if possible.
1344  * Update cur to reflect the new path.
1345  */
1346 STATIC int                              /* error */
1347 xfs_inobt_rshift(
1348         xfs_btree_cur_t         *cur,   /* btree cursor */
1349         int                     level,  /* level to shift record on */
1350         int                     *stat)  /* success/failure */
1351 {
1352         int                     error;  /* error return value */
1353         int                     i;      /* loop index */
1354         xfs_inobt_key_t         key;    /* key value for leaf level upward */
1355         xfs_buf_t               *lbp;   /* buffer for left (current) block */
1356         xfs_inobt_block_t       *left;  /* left (current) btree block */
1357         xfs_inobt_key_t         *lkp;   /* key pointer for left block */
1358         xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
1359         xfs_inobt_rec_t         *lrp;   /* record pointer for left block */
1360         xfs_buf_t               *rbp;   /* buffer for right neighbor block */
1361         xfs_inobt_block_t       *right; /* right neighbor btree block */
1362         xfs_inobt_key_t         *rkp;   /* key pointer for right block */
1363         xfs_inobt_ptr_t         *rpp;   /* address pointer for right block */
1364         xfs_inobt_rec_t         *rrp=NULL;      /* record pointer for right block */
1365         xfs_btree_cur_t         *tcur;  /* temporary cursor */
1366
1367         /*
1368          * Set up variables for this block as "left".
1369          */
1370         lbp = cur->bc_bufs[level];
1371         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1372 #ifdef DEBUG
1373         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1374                 return error;
1375 #endif
1376         /*
1377          * If we've got no right sibling then we can't shift an entry right.
1378          */
1379         if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
1380                 *stat = 0;
1381                 return 0;
1382         }
1383         /*
1384          * If the cursor entry is the one that would be moved, don't
1385          * do it... it's too complicated.
1386          */
1387         if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
1388                 *stat = 0;
1389                 return 0;
1390         }
1391         /*
1392          * Set up the right neighbor as "right".
1393          */
1394         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1395                         cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp,
1396                         XFS_INO_BTREE_REF)))
1397                 return error;
1398         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1399         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1400                 return error;
1401         /*
1402          * If it's full, it can't take another entry.
1403          */
1404         if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1405                 *stat = 0;
1406                 return 0;
1407         }
1408         /*
1409          * Make a hole at the start of the right neighbor block, then
1410          * copy the last left block entry to the hole.
1411          */
1412         if (level > 0) {
1413                 lkp = XFS_INOBT_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
1414                 lpp = XFS_INOBT_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
1415                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1416                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1417 #ifdef DEBUG
1418                 for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
1419                         if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
1420                                 return error;
1421                 }
1422 #endif
1423                 memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
1424                 memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
1425 #ifdef DEBUG
1426                 if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)))
1427                         return error;
1428 #endif
1429                 *rkp = *lkp; /* INT_: no change copy */
1430                 *rpp = *lpp; /* INT_: no change copy */
1431                 xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
1432                 xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
1433         } else {
1434                 lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
1435                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1436                 memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
1437                 *rrp = *lrp;
1438                 xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
1439                 key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1440                 rkp = &key;
1441         }
1442         /*
1443          * Decrement and log left's numrecs, bump and log right's numrecs.
1444          */
1445         INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
1446         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1447         INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
1448 #ifdef DEBUG
1449         if (level > 0)
1450                 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1451         else
1452                 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1453 #endif
1454         xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1455         /*
1456          * Using a temporary cursor, update the parent key values of the
1457          * block on the right.
1458          */
1459         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1460                 return error;
1461         xfs_btree_lastrec(tcur, level);
1462         if ((error = xfs_inobt_increment(tcur, level, &i)) ||
1463             (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
1464                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1465                 return error;
1466         }
1467         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1468         *stat = 1;
1469         return 0;
1470 }
1471
1472 /*
1473  * Split cur/level block in half.
1474  * Return new block number and its first record (to be inserted into parent).
1475  */
1476 STATIC int                              /* error */
1477 xfs_inobt_split(
1478         xfs_btree_cur_t         *cur,   /* btree cursor */
1479         int                     level,  /* level to split */
1480         xfs_agblock_t           *bnop,  /* output: block number allocated */
1481         xfs_inobt_key_t         *keyp,  /* output: first key of new block */
1482         xfs_btree_cur_t         **curp, /* output: new cursor */
1483         int                     *stat)  /* success/failure */
1484 {
1485         xfs_alloc_arg_t         args;   /* allocation argument structure */
1486         int                     error;  /* error return value */
1487         int                     i;      /* loop index/record number */
1488         xfs_agblock_t           lbno;   /* left (current) block number */
1489         xfs_buf_t               *lbp;   /* buffer for left block */
1490         xfs_inobt_block_t       *left;  /* left (current) btree block */
1491         xfs_inobt_key_t         *lkp;   /* left btree key pointer */
1492         xfs_inobt_ptr_t         *lpp;   /* left btree address pointer */
1493         xfs_inobt_rec_t         *lrp;   /* left btree record pointer */
1494         xfs_buf_t               *rbp;   /* buffer for right block */
1495         xfs_inobt_block_t       *right; /* right (new) btree block */
1496         xfs_inobt_key_t         *rkp;   /* right btree key pointer */
1497         xfs_inobt_ptr_t         *rpp;   /* right btree address pointer */
1498         xfs_inobt_rec_t         *rrp;   /* right btree record pointer */
1499
1500         /*
1501          * Set up left block (current one).
1502          */
1503         lbp = cur->bc_bufs[level];
1504         args.tp = cur->bc_tp;
1505         args.mp = cur->bc_mp;
1506         lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1507         /*
1508          * Allocate the new block.
1509          * If we can't do it, we're toast.  Give up.
1510          */
1511         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno);
1512         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1513                 args.isfl = args.userdata = args.minalignslop = 0;
1514         args.minlen = args.maxlen = args.prod = 1;
1515         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1516         if ((error = xfs_alloc_vextent(&args)))
1517                 return error;
1518         if (args.fsbno == NULLFSBLOCK) {
1519                 *stat = 0;
1520                 return 0;
1521         }
1522         ASSERT(args.len == 1);
1523         rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1524         /*
1525          * Set up the new block as "right".
1526          */
1527         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1528         /*
1529          * "Left" is the current (according to the cursor) block.
1530          */
1531         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1532 #ifdef DEBUG
1533         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1534                 return error;
1535 #endif
1536         /*
1537          * Fill in the btree header for the new block.
1538          */
1539         INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
1540         right->bb_level = left->bb_level; /* INT_: direct copy */
1541         INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
1542         /*
1543          * Make sure that if there's an odd number of entries now, that
1544          * each new block will have the same number of entries.
1545          */
1546         if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
1547             cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
1548                 INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
1549         i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
1550         /*
1551          * For non-leaf blocks, copy keys and addresses over to the new block.
1552          */
1553         if (level > 0) {
1554                 lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1555                 lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1556                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1557                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1558 #ifdef DEBUG
1559                 for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
1560                         if ((error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)))
1561                                 return error;
1562                 }
1563 #endif
1564                 memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
1565                 memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
1566                 xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1567                 xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1568                 *keyp = *rkp;
1569         }
1570         /*
1571          * For leaf blocks, copy records over to the new block.
1572          */
1573         else {
1574                 lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1575                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1576                 memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
1577                 xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1578                 keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
1579         }
1580         /*
1581          * Find the left block number by looking in the buffer.
1582          * Adjust numrecs, sibling pointers.
1583          */
1584         INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
1585         right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
1586         INT_SET(left->bb_rightsib, ARCH_CONVERT, args.agbno);
1587         INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
1588         xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1589         xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1590         /*
1591          * If there's a block to the new block's right, make that block
1592          * point back to right instead of to left.
1593          */
1594         if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
1595                 xfs_inobt_block_t       *rrblock;       /* rr btree block */
1596                 xfs_buf_t               *rrbp;          /* buffer for rrblock */
1597
1598                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1599                                 INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
1600                                 XFS_INO_BTREE_REF)))
1601                         return error;
1602                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1603                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1604                         return error;
1605                 INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.agbno);
1606                 xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1607         }
1608         /*
1609          * If the cursor is really in the right block, move it there.
1610          * If it's just pointing past the last entry in left, then we'll
1611          * insert there, so don't change anything in that case.
1612          */
1613         if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
1614                 xfs_btree_setbuf(cur, level, rbp);
1615                 cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
1616         }
1617         /*
1618          * If there are more levels, we'll need another cursor which refers
1619          * the right block, no matter where this cursor was.
1620          */
1621         if (level + 1 < cur->bc_nlevels) {
1622                 if ((error = xfs_btree_dup_cursor(cur, curp)))
1623                         return error;
1624                 (*curp)->bc_ptrs[level + 1]++;
1625         }
1626         *bnop = args.agbno;
1627         *stat = 1;
1628         return 0;
1629 }
1630
1631 /*
1632  * Update keys at all levels from here to the root along the cursor's path.
1633  */
1634 STATIC int                              /* error */
1635 xfs_inobt_updkey(
1636         xfs_btree_cur_t         *cur,   /* btree cursor */
1637         xfs_inobt_key_t         *keyp,  /* new key value to update to */
1638         int                     level)  /* starting level for update */
1639 {
1640         int                     ptr;    /* index of key in block */
1641
1642         /*
1643          * Go up the tree from this level toward the root.
1644          * At each level, update the key value to the value input.
1645          * Stop when we reach a level where the cursor isn't pointing
1646          * at the first entry in the block.
1647          */
1648         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1649                 xfs_buf_t               *bp;    /* buffer for block */
1650                 xfs_inobt_block_t       *block; /* btree block */
1651 #ifdef DEBUG
1652                 int                     error;  /* error return value */
1653 #endif
1654                 xfs_inobt_key_t         *kp;    /* ptr to btree block keys */
1655
1656                 bp = cur->bc_bufs[level];
1657                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1658 #ifdef DEBUG
1659                 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1660                         return error;
1661 #endif
1662                 ptr = cur->bc_ptrs[level];
1663                 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
1664                 *kp = *keyp;
1665                 xfs_inobt_log_keys(cur, bp, ptr, ptr);
1666         }
1667         return 0;
1668 }
1669
1670 /*
1671  * Externally visible routines.
1672  */
1673
1674 /*
1675  * Decrement cursor by one record at the level.
1676  * For nonzero levels the leaf-ward information is untouched.
1677  */
1678 int                                     /* error */
1679 xfs_inobt_decrement(
1680         xfs_btree_cur_t         *cur,   /* btree cursor */
1681         int                     level,  /* level in btree, 0 is leaf */
1682         int                     *stat)  /* success/failure */
1683 {
1684         xfs_inobt_block_t       *block; /* btree block */
1685         int                     error;
1686         int                     lev;    /* btree level */
1687
1688         ASSERT(level < cur->bc_nlevels);
1689         /*
1690          * Read-ahead to the left at this level.
1691          */
1692         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1693         /*
1694          * Decrement the ptr at this level.  If we're still in the block
1695          * then we're done.
1696          */
1697         if (--cur->bc_ptrs[level] > 0) {
1698                 *stat = 1;
1699                 return 0;
1700         }
1701         /*
1702          * Get a pointer to the btree block.
1703          */
1704         block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
1705 #ifdef DEBUG
1706         if ((error = xfs_btree_check_sblock(cur, block, level,
1707                         cur->bc_bufs[level])))
1708                 return error;
1709 #endif
1710         /*
1711          * If we just went off the left edge of the tree, return failure.
1712          */
1713         if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
1714                 *stat = 0;
1715                 return 0;
1716         }
1717         /*
1718          * March up the tree decrementing pointers.
1719          * Stop when we don't go off the left edge of a block.
1720          */
1721         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1722                 if (--cur->bc_ptrs[lev] > 0)
1723                         break;
1724                 /*
1725                  * Read-ahead the left block, we're going to read it
1726                  * in the next loop.
1727                  */
1728                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1729         }
1730         /*
1731          * If we went off the root then we are seriously confused.
1732          */
1733         ASSERT(lev < cur->bc_nlevels);
1734         /*
1735          * Now walk back down the tree, fixing up the cursor's buffer
1736          * pointers and key numbers.
1737          */
1738         for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1739                 xfs_agblock_t   agbno;  /* block number of btree block */
1740                 xfs_buf_t       *bp;    /* buffer containing btree block */
1741
1742                 agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
1743                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1744                                 cur->bc_private.i.agno, agbno, 0, &bp,
1745                                 XFS_INO_BTREE_REF)))
1746                         return error;
1747                 lev--;
1748                 xfs_btree_setbuf(cur, lev, bp);
1749                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1750                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1751                         return error;
1752                 cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
1753         }
1754         *stat = 1;
1755         return 0;
1756 }
1757
1758 /*
1759  * Delete the record pointed to by cur.
1760  * The cursor refers to the place where the record was (could be inserted)
1761  * when the operation returns.
1762  */
1763 int                                     /* error */
1764 xfs_inobt_delete(
1765         xfs_btree_cur_t *cur,           /* btree cursor */
1766         int             *stat)          /* success/failure */
1767 {
1768         int             error;
1769         int             i;              /* result code */
1770         int             level;          /* btree level */
1771
1772         /*
1773          * Go up the tree, starting at leaf level.
1774          * If 2 is returned then a join was done; go to the next level.
1775          * Otherwise we are done.
1776          */
1777         for (level = 0, i = 2; i == 2; level++) {
1778                 if ((error = xfs_inobt_delrec(cur, level, &i)))
1779                         return error;
1780         }
1781         if (i == 0) {
1782                 for (level = 1; level < cur->bc_nlevels; level++) {
1783                         if (cur->bc_ptrs[level] == 0) {
1784                                 if ((error = xfs_inobt_decrement(cur, level, &i)))
1785                                         return error;
1786                                 break;
1787                         }
1788                 }
1789         }
1790         *stat = i;
1791         return 0;
1792 }
1793
1794
1795 /*
1796  * Get the data from the pointed-to record.
1797  */
1798 int                                     /* error */
1799 xfs_inobt_get_rec(
1800         xfs_btree_cur_t         *cur,   /* btree cursor */
1801         xfs_agino_t             *ino,   /* output: starting inode of chunk */
1802         __int32_t               *fcnt,  /* output: number of free inodes */
1803         xfs_inofree_t           *free,  /* output: free inode mask */
1804         int                     *stat)  /* output: success/failure */
1805 {
1806         xfs_inobt_block_t       *block; /* btree block */
1807         xfs_buf_t               *bp;    /* buffer containing btree block */
1808 #ifdef DEBUG
1809         int                     error;  /* error return value */
1810 #endif
1811         int                     ptr;    /* record number */
1812         xfs_inobt_rec_t         *rec;   /* record data */
1813
1814         bp = cur->bc_bufs[0];
1815         ptr = cur->bc_ptrs[0];
1816         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1817 #ifdef DEBUG
1818         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1819                 return error;
1820 #endif
1821         /*
1822          * Off the right end or left end, return failure.
1823          */
1824         if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
1825                 *stat = 0;
1826                 return 0;
1827         }
1828         /*
1829          * Point to the record and extract its data.
1830          */
1831         rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1832         *ino = INT_GET(rec->ir_startino, ARCH_CONVERT);
1833         *fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT);
1834         *free = INT_GET(rec->ir_free, ARCH_CONVERT);
1835         *stat = 1;
1836         return 0;
1837 }
1838
1839 /*
1840  * Increment cursor by one record at the level.
1841  * For nonzero levels the leaf-ward information is untouched.
1842  */
1843 int                                     /* error */
1844 xfs_inobt_increment(
1845         xfs_btree_cur_t         *cur,   /* btree cursor */
1846         int                     level,  /* level in btree, 0 is leaf */
1847         int                     *stat)  /* success/failure */
1848 {
1849         xfs_inobt_block_t       *block; /* btree block */
1850         xfs_buf_t               *bp;    /* buffer containing btree block */
1851         int                     error;  /* error return value */
1852         int                     lev;    /* btree level */
1853
1854         ASSERT(level < cur->bc_nlevels);
1855         /*
1856          * Read-ahead to the right at this level.
1857          */
1858         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1859         /*
1860          * Get a pointer to the btree block.
1861          */
1862         bp = cur->bc_bufs[level];
1863         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1864 #ifdef DEBUG
1865         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1866                 return error;
1867 #endif
1868         /*
1869          * Increment the ptr at this level.  If we're still in the block
1870          * then we're done.
1871          */
1872         if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
1873                 *stat = 1;
1874                 return 0;
1875         }
1876         /*
1877          * If we just went off the right edge of the tree, return failure.
1878          */
1879         if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
1880                 *stat = 0;
1881                 return 0;
1882         }
1883         /*
1884          * March up the tree incrementing pointers.
1885          * Stop when we don't go off the right edge of a block.
1886          */
1887         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1888                 bp = cur->bc_bufs[lev];
1889                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1890 #ifdef DEBUG
1891                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1892                         return error;
1893 #endif
1894                 if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
1895                         break;
1896                 /*
1897                  * Read-ahead the right block, we're going to read it
1898                  * in the next loop.
1899                  */
1900                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1901         }
1902         /*
1903          * If we went off the root then we are seriously confused.
1904          */
1905         ASSERT(lev < cur->bc_nlevels);
1906         /*
1907          * Now walk back down the tree, fixing up the cursor's buffer
1908          * pointers and key numbers.
1909          */
1910         for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
1911              lev > level; ) {
1912                 xfs_agblock_t   agbno;  /* block number of btree block */
1913
1914                 agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
1915                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1916                                 cur->bc_private.i.agno, agbno, 0, &bp,
1917                                 XFS_INO_BTREE_REF)))
1918                         return error;
1919                 lev--;
1920                 xfs_btree_setbuf(cur, lev, bp);
1921                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1922                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1923                         return error;
1924                 cur->bc_ptrs[lev] = 1;
1925         }
1926         *stat = 1;
1927         return 0;
1928 }
1929
1930 /*
1931  * Insert the current record at the point referenced by cur.
1932  * The cursor may be inconsistent on return if splits have been done.
1933  */
1934 int                                     /* error */
1935 xfs_inobt_insert(
1936         xfs_btree_cur_t *cur,           /* btree cursor */
1937         int             *stat)          /* success/failure */
1938 {
1939         int             error;          /* error return value */
1940         int             i;              /* result value, 0 for failure */
1941         int             level;          /* current level number in btree */
1942         xfs_agblock_t   nbno;           /* new block number (split result) */
1943         xfs_btree_cur_t *ncur;          /* new cursor (split result) */
1944         xfs_inobt_rec_t nrec;           /* record being inserted this level */
1945         xfs_btree_cur_t *pcur;          /* previous level's cursor */
1946
1947         level = 0;
1948         nbno = NULLAGBLOCK;
1949         INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino);
1950         INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount);
1951         INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free);
1952         ncur = (xfs_btree_cur_t *)0;
1953         pcur = cur;
1954         /*
1955          * Loop going up the tree, starting at the leaf level.
1956          * Stop when we don't get a split block, that must mean that
1957          * the insert is finished with this level.
1958          */
1959         do {
1960                 /*
1961                  * Insert nrec/nbno into this level of the tree.
1962                  * Note if we fail, nbno will be null.
1963                  */
1964                 if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1965                                 &i))) {
1966                         if (pcur != cur)
1967                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1968                         return error;
1969                 }
1970                 /*
1971                  * See if the cursor we just used is trash.
1972                  * Can't trash the caller's cursor, but otherwise we should
1973                  * if ncur is a new cursor or we're about to be done.
1974                  */
1975                 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1976                         cur->bc_nlevels = pcur->bc_nlevels;
1977                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1978                 }
1979                 /*
1980                  * If we got a new cursor, switch to it.
1981                  */
1982                 if (ncur) {
1983                         pcur = ncur;
1984                         ncur = (xfs_btree_cur_t *)0;
1985                 }
1986         } while (nbno != NULLAGBLOCK);
1987         *stat = i;
1988         return 0;
1989 }
1990
1991 /*
1992  * Lookup the record equal to ino in the btree given by cur.
1993  */
1994 int                                     /* error */
1995 xfs_inobt_lookup_eq(
1996         xfs_btree_cur_t *cur,           /* btree cursor */
1997         xfs_agino_t     ino,            /* starting inode of chunk */
1998         __int32_t       fcnt,           /* free inode count */
1999         xfs_inofree_t   free,           /* free inode mask */
2000         int             *stat)          /* success/failure */
2001 {
2002         cur->bc_rec.i.ir_startino = ino;
2003         cur->bc_rec.i.ir_freecount = fcnt;
2004         cur->bc_rec.i.ir_free = free;
2005         return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
2006 }
2007
2008 /*
2009  * Lookup the first record greater than or equal to ino
2010  * in the btree given by cur.
2011  */
2012 int                                     /* error */
2013 xfs_inobt_lookup_ge(
2014         xfs_btree_cur_t *cur,           /* btree cursor */
2015         xfs_agino_t     ino,            /* starting inode of chunk */
2016         __int32_t       fcnt,           /* free inode count */
2017         xfs_inofree_t   free,           /* free inode mask */
2018         int             *stat)          /* success/failure */
2019 {
2020         cur->bc_rec.i.ir_startino = ino;
2021         cur->bc_rec.i.ir_freecount = fcnt;
2022         cur->bc_rec.i.ir_free = free;
2023         return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
2024 }
2025
2026 /*
2027  * Lookup the first record less than or equal to ino
2028  * in the btree given by cur.
2029  */
2030 int                                     /* error */
2031 xfs_inobt_lookup_le(
2032         xfs_btree_cur_t *cur,           /* btree cursor */
2033         xfs_agino_t     ino,            /* starting inode of chunk */
2034         __int32_t       fcnt,           /* free inode count */
2035         xfs_inofree_t   free,           /* free inode mask */
2036         int             *stat)          /* success/failure */
2037 {
2038         cur->bc_rec.i.ir_startino = ino;
2039         cur->bc_rec.i.ir_freecount = fcnt;
2040         cur->bc_rec.i.ir_free = free;
2041         return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
2042 }
2043
2044 /*
2045  * Update the record referred to by cur, to the value given
2046  * by [ino, fcnt, free].
2047  * This either works (return 0) or gets an EFSCORRUPTED error.
2048  */
2049 int                                     /* error */
2050 xfs_inobt_update(
2051         xfs_btree_cur_t         *cur,   /* btree cursor */
2052         xfs_agino_t             ino,    /* starting inode of chunk */
2053         __int32_t               fcnt,   /* free inode count */
2054         xfs_inofree_t           free)   /* free inode mask */
2055 {
2056         xfs_inobt_block_t       *block; /* btree block to update */
2057         xfs_buf_t               *bp;    /* buffer containing btree block */
2058         int                     error;  /* error return value */
2059         int                     ptr;    /* current record number (updating) */
2060         xfs_inobt_rec_t         *rp;    /* pointer to updated record */
2061
2062         /*
2063          * Pick up the current block.
2064          */
2065         bp = cur->bc_bufs[0];
2066         block = XFS_BUF_TO_INOBT_BLOCK(bp);
2067 #ifdef DEBUG
2068         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
2069                 return error;
2070 #endif
2071         /*
2072          * Get the address of the rec to be updated.
2073          */
2074         ptr = cur->bc_ptrs[0];
2075         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
2076         /*
2077          * Fill in the new contents and log them.
2078          */
2079         INT_SET(rp->ir_startino, ARCH_CONVERT, ino);
2080         INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt);
2081         INT_SET(rp->ir_free, ARCH_CONVERT, free);
2082         xfs_inobt_log_recs(cur, bp, ptr, ptr);
2083         /*
2084          * Updating first record in leaf. Pass new key value up to our parent.
2085          */
2086         if (ptr == 1) {
2087                 xfs_inobt_key_t key;    /* key containing [ino] */
2088
2089                 INT_SET(key.ir_startino, ARCH_CONVERT, ino);
2090                 if ((error = xfs_inobt_updkey(cur, &key, 1)))
2091                         return error;
2092         }
2093         return 0;
2094 }