[XFS] use pagevec lookups This reduces the time spend in the radix tree
[linux-2.6] / fs / xfs / xfs_ialloc_btree.c
1 /*
2  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir_sf.h"
35 #include "xfs_dir2_sf.h"
36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h"
38 #include "xfs_inode.h"
39 #include "xfs_btree.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_alloc.h"
42 #include "xfs_error.h"
43
44 STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
45 STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
46 STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
47 STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
48 STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
49 STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
50 STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
51 STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
52                 xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
53 STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
54
55 /*
56  * Single level of the xfs_inobt_delete record deletion routine.
57  * Delete record pointed to by cur/level.
58  * Remove the record from its block then rebalance the tree.
59  * Return 0 for error, 1 for done, 2 to go on to the next level.
60  */
61 STATIC int                              /* error */
62 xfs_inobt_delrec(
63         xfs_btree_cur_t         *cur,   /* btree cursor */
64         int                     level,  /* level removing record from */
65         int                     *stat)  /* fail/done/go-on */
66 {
67         xfs_buf_t               *agbp;  /* buffer for a.g. inode header */
68         xfs_mount_t             *mp;    /* mount structure */
69         xfs_agi_t               *agi;   /* allocation group inode header */
70         xfs_inobt_block_t       *block; /* btree block record/key lives in */
71         xfs_agblock_t           bno;    /* btree block number */
72         xfs_buf_t               *bp;    /* buffer for block */
73         int                     error;  /* error return value */
74         int                     i;      /* loop index */
75         xfs_inobt_key_t         key;    /* kp points here if block is level 0 */
76         xfs_inobt_key_t         *kp = NULL;     /* pointer to btree keys */
77         xfs_agblock_t           lbno;   /* left block's block number */
78         xfs_buf_t               *lbp;   /* left block's buffer pointer */
79         xfs_inobt_block_t       *left;  /* left btree block */
80         xfs_inobt_key_t         *lkp;   /* left block key pointer */
81         xfs_inobt_ptr_t         *lpp;   /* left block address pointer */
82         int                     lrecs = 0;      /* number of records in left block */
83         xfs_inobt_rec_t         *lrp;   /* left block record pointer */
84         xfs_inobt_ptr_t         *pp = NULL;     /* pointer to btree addresses */
85         int                     ptr;    /* index in btree block for this rec */
86         xfs_agblock_t           rbno;   /* right block's block number */
87         xfs_buf_t               *rbp;   /* right block's buffer pointer */
88         xfs_inobt_block_t       *right; /* right btree block */
89         xfs_inobt_key_t         *rkp;   /* right block key pointer */
90         xfs_inobt_rec_t         *rp;    /* pointer to btree records */
91         xfs_inobt_ptr_t         *rpp;   /* right block address pointer */
92         int                     rrecs = 0;      /* number of records in right block */
93         int                     numrecs;
94         xfs_inobt_rec_t         *rrp;   /* right block record pointer */
95         xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
96
97         mp = cur->bc_mp;
98
99         /*
100          * Get the index of the entry being deleted, check for nothing there.
101          */
102         ptr = cur->bc_ptrs[level];
103         if (ptr == 0) {
104                 *stat = 0;
105                 return 0;
106         }
107
108         /*
109          * Get the buffer & block containing the record or key/ptr.
110          */
111         bp = cur->bc_bufs[level];
112         block = XFS_BUF_TO_INOBT_BLOCK(bp);
113 #ifdef DEBUG
114         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
115                 return error;
116 #endif
117         /*
118          * Fail if we're off the end of the block.
119          */
120
121         numrecs = be16_to_cpu(block->bb_numrecs);
122         if (ptr > numrecs) {
123                 *stat = 0;
124                 return 0;
125         }
126         /*
127          * It's a nonleaf.  Excise the key and ptr being deleted, by
128          * sliding the entries past them down one.
129          * Log the changed areas of the block.
130          */
131         if (level > 0) {
132                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
133                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
134 #ifdef DEBUG
135                 for (i = ptr; i < numrecs; i++) {
136                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i]), level)))
137                                 return error;
138                 }
139 #endif
140                 if (ptr < numrecs) {
141                         memmove(&kp[ptr - 1], &kp[ptr],
142                                 (numrecs - ptr) * sizeof(*kp));
143                         memmove(&pp[ptr - 1], &pp[ptr],
144                                 (numrecs - ptr) * sizeof(*kp));
145                         xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
146                         xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
147                 }
148         }
149         /*
150          * It's a leaf.  Excise the record being deleted, by sliding the
151          * entries past it down one.  Log the changed areas of the block.
152          */
153         else {
154                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
155                 if (ptr < numrecs) {
156                         memmove(&rp[ptr - 1], &rp[ptr],
157                                 (numrecs - ptr) * sizeof(*rp));
158                         xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
159                 }
160                 /*
161                  * If it's the first record in the block, we'll need a key
162                  * structure to pass up to the next level (updkey).
163                  */
164                 if (ptr == 1) {
165                         key.ir_startino = rp->ir_startino;
166                         kp = &key;
167                 }
168         }
169         /*
170          * Decrement and log the number of entries in the block.
171          */
172         numrecs--;
173         block->bb_numrecs = cpu_to_be16(numrecs);
174         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
175         /*
176          * Is this the root level?  If so, we're almost done.
177          */
178         if (level == cur->bc_nlevels - 1) {
179                 /*
180                  * If this is the root level,
181                  * and there's only one entry left,
182                  * and it's NOT the leaf level,
183                  * then we can get rid of this level.
184                  */
185                 if (numrecs == 1 && level > 0) {
186                         agbp = cur->bc_private.i.agbp;
187                         agi = XFS_BUF_TO_AGI(agbp);
188                         /*
189                          * pp is still set to the first pointer in the block.
190                          * Make it the new root of the btree.
191                          */
192                         bno = be32_to_cpu(agi->agi_root);
193                         agi->agi_root = *pp;
194                         be32_add(&agi->agi_level, -1);
195                         /*
196                          * Free the block.
197                          */
198                         if ((error = xfs_free_extent(cur->bc_tp,
199                                 XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1)))
200                                 return error;
201                         xfs_trans_binval(cur->bc_tp, bp);
202                         xfs_ialloc_log_agi(cur->bc_tp, agbp,
203                                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
204                         /*
205                          * Update the cursor so there's one fewer level.
206                          */
207                         cur->bc_bufs[level] = NULL;
208                         cur->bc_nlevels--;
209                 } else if (level > 0 &&
210                            (error = xfs_inobt_decrement(cur, level, &i)))
211                         return error;
212                 *stat = 1;
213                 return 0;
214         }
215         /*
216          * If we deleted the leftmost entry in the block, update the
217          * key values above us in the tree.
218          */
219         if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
220                 return error;
221         /*
222          * If the number of records remaining in the block is at least
223          * the minimum, we're done.
224          */
225         if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
226                 if (level > 0 &&
227                     (error = xfs_inobt_decrement(cur, level, &i)))
228                         return error;
229                 *stat = 1;
230                 return 0;
231         }
232         /*
233          * Otherwise, we have to move some records around to keep the
234          * tree balanced.  Look at the left and right sibling blocks to
235          * see if we can re-balance by moving only one record.
236          */
237         rbno = be32_to_cpu(block->bb_rightsib);
238         lbno = be32_to_cpu(block->bb_leftsib);
239         bno = NULLAGBLOCK;
240         ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
241         /*
242          * Duplicate the cursor so our btree manipulations here won't
243          * disrupt the next level up.
244          */
245         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
246                 return error;
247         /*
248          * If there's a right sibling, see if it's ok to shift an entry
249          * out of it.
250          */
251         if (rbno != NULLAGBLOCK) {
252                 /*
253                  * Move the temp cursor to the last entry in the next block.
254                  * Actually any entry but the first would suffice.
255                  */
256                 i = xfs_btree_lastrec(tcur, level);
257                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
258                 if ((error = xfs_inobt_increment(tcur, level, &i)))
259                         goto error0;
260                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
261                 i = xfs_btree_lastrec(tcur, level);
262                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
263                 /*
264                  * Grab a pointer to the block.
265                  */
266                 rbp = tcur->bc_bufs[level];
267                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
268 #ifdef DEBUG
269                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
270                         goto error0;
271 #endif
272                 /*
273                  * Grab the current block number, for future use.
274                  */
275                 bno = be32_to_cpu(right->bb_leftsib);
276                 /*
277                  * If right block is full enough so that removing one entry
278                  * won't make it too empty, and left-shifting an entry out
279                  * of right to us works, we're done.
280                  */
281                 if (be16_to_cpu(right->bb_numrecs) - 1 >=
282                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
283                         if ((error = xfs_inobt_lshift(tcur, level, &i)))
284                                 goto error0;
285                         if (i) {
286                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
287                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
288                                 xfs_btree_del_cursor(tcur,
289                                                      XFS_BTREE_NOERROR);
290                                 if (level > 0 &&
291                                     (error = xfs_inobt_decrement(cur, level,
292                                                 &i)))
293                                         return error;
294                                 *stat = 1;
295                                 return 0;
296                         }
297                 }
298                 /*
299                  * Otherwise, grab the number of records in right for
300                  * future reference, and fix up the temp cursor to point
301                  * to our block again (last record).
302                  */
303                 rrecs = be16_to_cpu(right->bb_numrecs);
304                 if (lbno != NULLAGBLOCK) {
305                         xfs_btree_firstrec(tcur, level);
306                         if ((error = xfs_inobt_decrement(tcur, level, &i)))
307                                 goto error0;
308                 }
309         }
310         /*
311          * If there's a left sibling, see if it's ok to shift an entry
312          * out of it.
313          */
314         if (lbno != NULLAGBLOCK) {
315                 /*
316                  * Move the temp cursor to the first entry in the
317                  * previous block.
318                  */
319                 xfs_btree_firstrec(tcur, level);
320                 if ((error = xfs_inobt_decrement(tcur, level, &i)))
321                         goto error0;
322                 xfs_btree_firstrec(tcur, level);
323                 /*
324                  * Grab a pointer to the block.
325                  */
326                 lbp = tcur->bc_bufs[level];
327                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
328 #ifdef DEBUG
329                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
330                         goto error0;
331 #endif
332                 /*
333                  * Grab the current block number, for future use.
334                  */
335                 bno = be32_to_cpu(left->bb_rightsib);
336                 /*
337                  * If left block is full enough so that removing one entry
338                  * won't make it too empty, and right-shifting an entry out
339                  * of left to us works, we're done.
340                  */
341                 if (be16_to_cpu(left->bb_numrecs) - 1 >=
342                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
343                         if ((error = xfs_inobt_rshift(tcur, level, &i)))
344                                 goto error0;
345                         if (i) {
346                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
347                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
348                                 xfs_btree_del_cursor(tcur,
349                                                      XFS_BTREE_NOERROR);
350                                 if (level == 0)
351                                         cur->bc_ptrs[0]++;
352                                 *stat = 1;
353                                 return 0;
354                         }
355                 }
356                 /*
357                  * Otherwise, grab the number of records in right for
358                  * future reference.
359                  */
360                 lrecs = be16_to_cpu(left->bb_numrecs);
361         }
362         /*
363          * Delete the temp cursor, we're done with it.
364          */
365         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
366         /*
367          * If here, we need to do a join to keep the tree balanced.
368          */
369         ASSERT(bno != NULLAGBLOCK);
370         /*
371          * See if we can join with the left neighbor block.
372          */
373         if (lbno != NULLAGBLOCK &&
374             lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
375                 /*
376                  * Set "right" to be the starting block,
377                  * "left" to be the left neighbor.
378                  */
379                 rbno = bno;
380                 right = block;
381                 rrecs = be16_to_cpu(right->bb_numrecs);
382                 rbp = bp;
383                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
384                                 cur->bc_private.i.agno, lbno, 0, &lbp,
385                                 XFS_INO_BTREE_REF)))
386                         return error;
387                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
388                 lrecs = be16_to_cpu(left->bb_numrecs);
389                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
390                         return error;
391         }
392         /*
393          * If that won't work, see if we can join with the right neighbor block.
394          */
395         else if (rbno != NULLAGBLOCK &&
396                  rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
397                 /*
398                  * Set "left" to be the starting block,
399                  * "right" to be the right neighbor.
400                  */
401                 lbno = bno;
402                 left = block;
403                 lrecs = be16_to_cpu(left->bb_numrecs);
404                 lbp = bp;
405                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
406                                 cur->bc_private.i.agno, rbno, 0, &rbp,
407                                 XFS_INO_BTREE_REF)))
408                         return error;
409                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
410                 rrecs = be16_to_cpu(right->bb_numrecs);
411                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
412                         return error;
413         }
414         /*
415          * Otherwise, we can't fix the imbalance.
416          * Just return.  This is probably a logic error, but it's not fatal.
417          */
418         else {
419                 if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
420                         return error;
421                 *stat = 1;
422                 return 0;
423         }
424         /*
425          * We're now going to join "left" and "right" by moving all the stuff
426          * in "right" to "left" and deleting "right".
427          */
428         if (level > 0) {
429                 /*
430                  * It's a non-leaf.  Move keys and pointers.
431                  */
432                 lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
433                 lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
434                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
435                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
436 #ifdef DEBUG
437                 for (i = 0; i < rrecs; i++) {
438                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
439                                 return error;
440                 }
441 #endif
442                 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
443                 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
444                 xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
445                 xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
446         } else {
447                 /*
448                  * It's a leaf.  Move records.
449                  */
450                 lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
451                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
452                 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
453                 xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
454         }
455         /*
456          * If we joined with the left neighbor, set the buffer in the
457          * cursor to the left block, and fix up the index.
458          */
459         if (bp != lbp) {
460                 xfs_btree_setbuf(cur, level, lbp);
461                 cur->bc_ptrs[level] += lrecs;
462         }
463         /*
464          * If we joined with the right neighbor and there's a level above
465          * us, increment the cursor at that level.
466          */
467         else if (level + 1 < cur->bc_nlevels &&
468                  (error = xfs_alloc_increment(cur, level + 1, &i)))
469                 return error;
470         /*
471          * Fix up the number of records in the surviving block.
472          */
473         lrecs += rrecs;
474         left->bb_numrecs = cpu_to_be16(lrecs);
475         /*
476          * Fix up the right block pointer in the surviving block, and log it.
477          */
478         left->bb_rightsib = right->bb_rightsib;
479         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
480         /*
481          * If there is a right sibling now, make it point to the
482          * remaining block.
483          */
484         if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
485                 xfs_inobt_block_t       *rrblock;
486                 xfs_buf_t               *rrbp;
487
488                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
489                                 cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib), 0,
490                                 &rrbp, XFS_INO_BTREE_REF)))
491                         return error;
492                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
493                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
494                         return error;
495                 rrblock->bb_leftsib = cpu_to_be32(lbno);
496                 xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
497         }
498         /*
499          * Free the deleting block.
500          */
501         if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
502                                      cur->bc_private.i.agno, rbno), 1)))
503                 return error;
504         xfs_trans_binval(cur->bc_tp, rbp);
505         /*
506          * Readjust the ptr at this level if it's not a leaf, since it's
507          * still pointing at the deletion point, which makes the cursor
508          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
509          * We can't use decrement because it would change the next level up.
510          */
511         if (level > 0)
512                 cur->bc_ptrs[level]--;
513         /*
514          * Return value means the next level up has something to do.
515          */
516         *stat = 2;
517         return 0;
518
519 error0:
520         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
521         return error;
522 }
523
524 /*
525  * Insert one record/level.  Return information to the caller
526  * allowing the next level up to proceed if necessary.
527  */
528 STATIC int                              /* error */
529 xfs_inobt_insrec(
530         xfs_btree_cur_t         *cur,   /* btree cursor */
531         int                     level,  /* level to insert record at */
532         xfs_agblock_t           *bnop,  /* i/o: block number inserted */
533         xfs_inobt_rec_t         *recp,  /* i/o: record data inserted */
534         xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
535         int                     *stat)  /* success/failure */
536 {
537         xfs_inobt_block_t       *block; /* btree block record/key lives in */
538         xfs_buf_t               *bp;    /* buffer for block */
539         int                     error;  /* error return value */
540         int                     i;      /* loop index */
541         xfs_inobt_key_t         key;    /* key value being inserted */
542         xfs_inobt_key_t         *kp=NULL;       /* pointer to btree keys */
543         xfs_agblock_t           nbno;   /* block number of allocated block */
544         xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
545         xfs_inobt_key_t         nkey;   /* new key value, from split */
546         xfs_inobt_rec_t         nrec;   /* new record value, for caller */
547         int                     numrecs;
548         int                     optr;   /* old ptr value */
549         xfs_inobt_ptr_t         *pp;    /* pointer to btree addresses */
550         int                     ptr;    /* index in btree block for this rec */
551         xfs_inobt_rec_t         *rp=NULL;       /* pointer to btree records */
552
553         /*
554          * GCC doesn't understand the (arguably complex) control flow in
555          * this function and complains about uninitialized structure fields
556          * without this.
557          */
558         memset(&nrec, 0, sizeof(nrec));
559
560         /*
561          * If we made it to the root level, allocate a new root block
562          * and we're done.
563          */
564         if (level >= cur->bc_nlevels) {
565                 error = xfs_inobt_newroot(cur, &i);
566                 *bnop = NULLAGBLOCK;
567                 *stat = i;
568                 return error;
569         }
570         /*
571          * Make a key out of the record data to be inserted, and save it.
572          */
573         key.ir_startino = recp->ir_startino; /* INT_: direct copy */
574         optr = ptr = cur->bc_ptrs[level];
575         /*
576          * If we're off the left edge, return failure.
577          */
578         if (ptr == 0) {
579                 *stat = 0;
580                 return 0;
581         }
582         /*
583          * Get pointers to the btree buffer and block.
584          */
585         bp = cur->bc_bufs[level];
586         block = XFS_BUF_TO_INOBT_BLOCK(bp);
587         numrecs = be16_to_cpu(block->bb_numrecs);
588 #ifdef DEBUG
589         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
590                 return error;
591         /*
592          * Check that the new entry is being inserted in the right place.
593          */
594         if (ptr <= numrecs) {
595                 if (level == 0) {
596                         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
597                         xfs_btree_check_rec(cur->bc_btnum, recp, rp);
598                 } else {
599                         kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
600                         xfs_btree_check_key(cur->bc_btnum, &key, kp);
601                 }
602         }
603 #endif
604         nbno = NULLAGBLOCK;
605         ncur = (xfs_btree_cur_t *)0;
606         /*
607          * If the block is full, we can't insert the new entry until we
608          * make the block un-full.
609          */
610         if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
611                 /*
612                  * First, try shifting an entry to the right neighbor.
613                  */
614                 if ((error = xfs_inobt_rshift(cur, level, &i)))
615                         return error;
616                 if (i) {
617                         /* nothing */
618                 }
619                 /*
620                  * Next, try shifting an entry to the left neighbor.
621                  */
622                 else {
623                         if ((error = xfs_inobt_lshift(cur, level, &i)))
624                                 return error;
625                         if (i) {
626                                 optr = ptr = cur->bc_ptrs[level];
627                         } else {
628                                 /*
629                                  * Next, try splitting the current block
630                                  * in half. If this works we have to
631                                  * re-set our variables because
632                                  * we could be in a different block now.
633                                  */
634                                 if ((error = xfs_inobt_split(cur, level, &nbno,
635                                                 &nkey, &ncur, &i)))
636                                         return error;
637                                 if (i) {
638                                         bp = cur->bc_bufs[level];
639                                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
640 #ifdef DEBUG
641                                         if ((error = xfs_btree_check_sblock(cur,
642                                                         block, level, bp)))
643                                                 return error;
644 #endif
645                                         ptr = cur->bc_ptrs[level];
646                                         nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */
647                                 } else {
648                                         /*
649                                          * Otherwise the insert fails.
650                                          */
651                                         *stat = 0;
652                                         return 0;
653                                 }
654                         }
655                 }
656         }
657         /*
658          * At this point we know there's room for our new entry in the block
659          * we're pointing at.
660          */
661         numrecs = be16_to_cpu(block->bb_numrecs);
662         if (level > 0) {
663                 /*
664                  * It's a non-leaf entry.  Make a hole for the new data
665                  * in the key and ptr regions of the block.
666                  */
667                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
668                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
669 #ifdef DEBUG
670                 for (i = numrecs; i >= ptr; i--) {
671                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
672                                 return error;
673                 }
674 #endif
675                 memmove(&kp[ptr], &kp[ptr - 1],
676                         (numrecs - ptr + 1) * sizeof(*kp));
677                 memmove(&pp[ptr], &pp[ptr - 1],
678                         (numrecs - ptr + 1) * sizeof(*pp));
679                 /*
680                  * Now stuff the new data in, bump numrecs and log the new data.
681                  */
682 #ifdef DEBUG
683                 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
684                         return error;
685 #endif
686                 kp[ptr - 1] = key; /* INT_: struct copy */
687                 pp[ptr - 1] = cpu_to_be32(*bnop);
688                 numrecs++;
689                 block->bb_numrecs = cpu_to_be16(numrecs);
690                 xfs_inobt_log_keys(cur, bp, ptr, numrecs);
691                 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
692         } else {
693                 /*
694                  * It's a leaf entry.  Make a hole for the new record.
695                  */
696                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
697                 memmove(&rp[ptr], &rp[ptr - 1],
698                         (numrecs - ptr + 1) * sizeof(*rp));
699                 /*
700                  * Now stuff the new record in, bump numrecs
701                  * and log the new data.
702                  */
703                 rp[ptr - 1] = *recp; /* INT_: struct copy */
704                 numrecs++;
705                 block->bb_numrecs = cpu_to_be16(numrecs);
706                 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
707         }
708         /*
709          * Log the new number of records in the btree header.
710          */
711         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
712 #ifdef DEBUG
713         /*
714          * Check that the key/record is in the right place, now.
715          */
716         if (ptr < numrecs) {
717                 if (level == 0)
718                         xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
719                                 rp + ptr);
720                 else
721                         xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
722                                 kp + ptr);
723         }
724 #endif
725         /*
726          * If we inserted at the start of a block, update the parents' keys.
727          */
728         if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
729                 return error;
730         /*
731          * Return the new block number, if any.
732          * If there is one, give back a record value and a cursor too.
733          */
734         *bnop = nbno;
735         if (nbno != NULLAGBLOCK) {
736                 *recp = nrec; /* INT_: struct copy */
737                 *curp = ncur;
738         }
739         *stat = 1;
740         return 0;
741 }
742
743 /*
744  * Log header fields from a btree block.
745  */
746 STATIC void
747 xfs_inobt_log_block(
748         xfs_trans_t             *tp,    /* transaction pointer */
749         xfs_buf_t               *bp,    /* buffer containing btree block */
750         int                     fields) /* mask of fields: XFS_BB_... */
751 {
752         int                     first;  /* first byte offset logged */
753         int                     last;   /* last byte offset logged */
754         static const short      offsets[] = {   /* table of offsets */
755                 offsetof(xfs_inobt_block_t, bb_magic),
756                 offsetof(xfs_inobt_block_t, bb_level),
757                 offsetof(xfs_inobt_block_t, bb_numrecs),
758                 offsetof(xfs_inobt_block_t, bb_leftsib),
759                 offsetof(xfs_inobt_block_t, bb_rightsib),
760                 sizeof(xfs_inobt_block_t)
761         };
762
763         xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
764         xfs_trans_log_buf(tp, bp, first, last);
765 }
766
767 /*
768  * Log keys from a btree block (nonleaf).
769  */
770 STATIC void
771 xfs_inobt_log_keys(
772         xfs_btree_cur_t         *cur,   /* btree cursor */
773         xfs_buf_t               *bp,    /* buffer containing btree block */
774         int                     kfirst, /* index of first key to log */
775         int                     klast)  /* index of last key to log */
776 {
777         xfs_inobt_block_t       *block; /* btree block to log from */
778         int                     first;  /* first byte offset logged */
779         xfs_inobt_key_t         *kp;    /* key pointer in btree block */
780         int                     last;   /* last byte offset logged */
781
782         block = XFS_BUF_TO_INOBT_BLOCK(bp);
783         kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
784         first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
785         last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
786         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
787 }
788
789 /*
790  * Log block pointer fields from a btree block (nonleaf).
791  */
792 STATIC void
793 xfs_inobt_log_ptrs(
794         xfs_btree_cur_t         *cur,   /* btree cursor */
795         xfs_buf_t               *bp,    /* buffer containing btree block */
796         int                     pfirst, /* index of first pointer to log */
797         int                     plast)  /* index of last pointer to log */
798 {
799         xfs_inobt_block_t       *block; /* btree block to log from */
800         int                     first;  /* first byte offset logged */
801         int                     last;   /* last byte offset logged */
802         xfs_inobt_ptr_t         *pp;    /* block-pointer pointer in btree blk */
803
804         block = XFS_BUF_TO_INOBT_BLOCK(bp);
805         pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
806         first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
807         last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
808         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
809 }
810
811 /*
812  * Log records from a btree block (leaf).
813  */
814 STATIC void
815 xfs_inobt_log_recs(
816         xfs_btree_cur_t         *cur,   /* btree cursor */
817         xfs_buf_t               *bp,    /* buffer containing btree block */
818         int                     rfirst, /* index of first record to log */
819         int                     rlast)  /* index of last record to log */
820 {
821         xfs_inobt_block_t       *block; /* btree block to log from */
822         int                     first;  /* first byte offset logged */
823         int                     last;   /* last byte offset logged */
824         xfs_inobt_rec_t         *rp;    /* record pointer for btree block */
825
826         block = XFS_BUF_TO_INOBT_BLOCK(bp);
827         rp = XFS_INOBT_REC_ADDR(block, 1, cur);
828         first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
829         last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
830         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
831 }
832
833 /*
834  * Lookup the record.  The cursor is made to point to it, based on dir.
835  * Return 0 if can't find any such record, 1 for success.
836  */
837 STATIC int                              /* error */
838 xfs_inobt_lookup(
839         xfs_btree_cur_t         *cur,   /* btree cursor */
840         xfs_lookup_t            dir,    /* <=, ==, or >= */
841         int                     *stat)  /* success/failure */
842 {
843         xfs_agblock_t           agbno;  /* a.g. relative btree block number */
844         xfs_agnumber_t          agno;   /* allocation group number */
845         xfs_inobt_block_t       *block=NULL;    /* current btree block */
846         __int64_t               diff;   /* difference for the current key */
847         int                     error;  /* error return value */
848         int                     keyno=0;        /* current key number */
849         int                     level;  /* level in the btree */
850         xfs_mount_t             *mp;    /* file system mount point */
851
852         /*
853          * Get the allocation group header, and the root block number.
854          */
855         mp = cur->bc_mp;
856         {
857                 xfs_agi_t       *agi;   /* a.g. inode header */
858
859                 agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
860                 agno = be32_to_cpu(agi->agi_seqno);
861                 agbno = be32_to_cpu(agi->agi_root);
862         }
863         /*
864          * Iterate over each level in the btree, starting at the root.
865          * For each level above the leaves, find the key we need, based
866          * on the lookup record, then follow the corresponding block
867          * pointer down to the next level.
868          */
869         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
870                 xfs_buf_t       *bp;    /* buffer pointer for btree block */
871                 xfs_daddr_t     d;      /* disk address of btree block */
872
873                 /*
874                  * Get the disk address we're looking for.
875                  */
876                 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
877                 /*
878                  * If the old buffer at this level is for a different block,
879                  * throw it away, otherwise just use it.
880                  */
881                 bp = cur->bc_bufs[level];
882                 if (bp && XFS_BUF_ADDR(bp) != d)
883                         bp = (xfs_buf_t *)0;
884                 if (!bp) {
885                         /*
886                          * Need to get a new buffer.  Read it, then
887                          * set it in the cursor, releasing the old one.
888                          */
889                         if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
890                                         agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
891                                 return error;
892                         xfs_btree_setbuf(cur, level, bp);
893                         /*
894                          * Point to the btree block, now that we have the buffer
895                          */
896                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
897                         if ((error = xfs_btree_check_sblock(cur, block, level,
898                                         bp)))
899                                 return error;
900                 } else
901                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
902                 /*
903                  * If we already had a key match at a higher level, we know
904                  * we need to use the first entry in this block.
905                  */
906                 if (diff == 0)
907                         keyno = 1;
908                 /*
909                  * Otherwise we need to search this block.  Do a binary search.
910                  */
911                 else {
912                         int             high;   /* high entry number */
913                         xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */
914                         xfs_inobt_rec_t *krbase=NULL;/* base of records in block */
915                         int             low;    /* low entry number */
916
917                         /*
918                          * Get a pointer to keys or records.
919                          */
920                         if (level > 0)
921                                 kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
922                         else
923                                 krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
924                         /*
925                          * Set low and high entry numbers, 1-based.
926                          */
927                         low = 1;
928                         if (!(high = be16_to_cpu(block->bb_numrecs))) {
929                                 /*
930                                  * If the block is empty, the tree must
931                                  * be an empty leaf.
932                                  */
933                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
934                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
935                                 *stat = 0;
936                                 return 0;
937                         }
938                         /*
939                          * Binary search the block.
940                          */
941                         while (low <= high) {
942                                 xfs_agino_t     startino;       /* key value */
943
944                                 /*
945                                  * keyno is average of low and high.
946                                  */
947                                 keyno = (low + high) >> 1;
948                                 /*
949                                  * Get startino.
950                                  */
951                                 if (level > 0) {
952                                         xfs_inobt_key_t *kkp;
953
954                                         kkp = kkbase + keyno - 1;
955                                         startino = INT_GET(kkp->ir_startino, ARCH_CONVERT);
956                                 } else {
957                                         xfs_inobt_rec_t *krp;
958
959                                         krp = krbase + keyno - 1;
960                                         startino = INT_GET(krp->ir_startino, ARCH_CONVERT);
961                                 }
962                                 /*
963                                  * Compute difference to get next direction.
964                                  */
965                                 diff = (__int64_t)
966                                         startino - cur->bc_rec.i.ir_startino;
967                                 /*
968                                  * Less than, move right.
969                                  */
970                                 if (diff < 0)
971                                         low = keyno + 1;
972                                 /*
973                                  * Greater than, move left.
974                                  */
975                                 else if (diff > 0)
976                                         high = keyno - 1;
977                                 /*
978                                  * Equal, we're done.
979                                  */
980                                 else
981                                         break;
982                         }
983                 }
984                 /*
985                  * If there are more levels, set up for the next level
986                  * by getting the block number and filling in the cursor.
987                  */
988                 if (level > 0) {
989                         /*
990                          * If we moved left, need the previous key number,
991                          * unless there isn't one.
992                          */
993                         if (diff > 0 && --keyno < 1)
994                                 keyno = 1;
995                         agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, keyno, cur));
996 #ifdef DEBUG
997                         if ((error = xfs_btree_check_sptr(cur, agbno, level)))
998                                 return error;
999 #endif
1000                         cur->bc_ptrs[level] = keyno;
1001                 }
1002         }
1003         /*
1004          * Done with the search.
1005          * See if we need to adjust the results.
1006          */
1007         if (dir != XFS_LOOKUP_LE && diff < 0) {
1008                 keyno++;
1009                 /*
1010                  * If ge search and we went off the end of the block, but it's
1011                  * not the last block, we're in the wrong block.
1012                  */
1013                 if (dir == XFS_LOOKUP_GE &&
1014                     keyno > be16_to_cpu(block->bb_numrecs) &&
1015                     be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1016                         int     i;
1017
1018                         cur->bc_ptrs[0] = keyno;
1019                         if ((error = xfs_inobt_increment(cur, 0, &i)))
1020                                 return error;
1021                         ASSERT(i == 1);
1022                         *stat = 1;
1023                         return 0;
1024                 }
1025         }
1026         else if (dir == XFS_LOOKUP_LE && diff > 0)
1027                 keyno--;
1028         cur->bc_ptrs[0] = keyno;
1029         /*
1030          * Return if we succeeded or not.
1031          */
1032         if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1033                 *stat = 0;
1034         else
1035                 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1036         return 0;
1037 }
1038
1039 /*
1040  * Move 1 record left from cur/level if possible.
1041  * Update cur to reflect the new path.
1042  */
1043 STATIC int                              /* error */
1044 xfs_inobt_lshift(
1045         xfs_btree_cur_t         *cur,   /* btree cursor */
1046         int                     level,  /* level to shift record on */
1047         int                     *stat)  /* success/failure */
1048 {
1049         int                     error;  /* error return value */
1050 #ifdef DEBUG
1051         int                     i;      /* loop index */
1052 #endif
1053         xfs_inobt_key_t         key;    /* key value for leaf level upward */
1054         xfs_buf_t               *lbp;   /* buffer for left neighbor block */
1055         xfs_inobt_block_t       *left;  /* left neighbor btree block */
1056         xfs_inobt_key_t         *lkp=NULL;      /* key pointer for left block */
1057         xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
1058         xfs_inobt_rec_t         *lrp=NULL;      /* record pointer for left block */
1059         int                     nrec;   /* new number of left block entries */
1060         xfs_buf_t               *rbp;   /* buffer for right (current) block */
1061         xfs_inobt_block_t       *right; /* right (current) btree block */
1062         xfs_inobt_key_t         *rkp=NULL;      /* key pointer for right block */
1063         xfs_inobt_ptr_t         *rpp=NULL;      /* address pointer for right block */
1064         xfs_inobt_rec_t         *rrp=NULL;      /* record pointer for right block */
1065
1066         /*
1067          * Set up variables for this block as "right".
1068          */
1069         rbp = cur->bc_bufs[level];
1070         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1071 #ifdef DEBUG
1072         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1073                 return error;
1074 #endif
1075         /*
1076          * If we've got no left sibling then we can't shift an entry left.
1077          */
1078         if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1079                 *stat = 0;
1080                 return 0;
1081         }
1082         /*
1083          * If the cursor entry is the one that would be moved, don't
1084          * do it... it's too complicated.
1085          */
1086         if (cur->bc_ptrs[level] <= 1) {
1087                 *stat = 0;
1088                 return 0;
1089         }
1090         /*
1091          * Set up the left neighbor as "left".
1092          */
1093         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1094                         cur->bc_private.i.agno, be32_to_cpu(right->bb_leftsib),
1095                         0, &lbp, XFS_INO_BTREE_REF)))
1096                 return error;
1097         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1098         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1099                 return error;
1100         /*
1101          * If it's full, it can't take another entry.
1102          */
1103         if (be16_to_cpu(left->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1104                 *stat = 0;
1105                 return 0;
1106         }
1107         nrec = be16_to_cpu(left->bb_numrecs) + 1;
1108         /*
1109          * If non-leaf, copy a key and a ptr to the left block.
1110          */
1111         if (level > 0) {
1112                 lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
1113                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1114                 *lkp = *rkp;
1115                 xfs_inobt_log_keys(cur, lbp, nrec, nrec);
1116                 lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
1117                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1118 #ifdef DEBUG
1119                 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1120                         return error;
1121 #endif
1122                 *lpp = *rpp; /* INT_: no-change copy */
1123                 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1124         }
1125         /*
1126          * If leaf, copy a record to the left block.
1127          */
1128         else {
1129                 lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
1130                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1131                 *lrp = *rrp;
1132                 xfs_inobt_log_recs(cur, lbp, nrec, nrec);
1133         }
1134         /*
1135          * Bump and log left's numrecs, decrement and log right's numrecs.
1136          */
1137         be16_add(&left->bb_numrecs, 1);
1138         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1139 #ifdef DEBUG
1140         if (level > 0)
1141                 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1142         else
1143                 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1144 #endif
1145         be16_add(&right->bb_numrecs, -1);
1146         xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1147         /*
1148          * Slide the contents of right down one entry.
1149          */
1150         if (level > 0) {
1151 #ifdef DEBUG
1152                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1153                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1154                                         level)))
1155                                 return error;
1156                 }
1157 #endif
1158                 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1159                 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1160                 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1161                 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1162         } else {
1163                 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1164                 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1165                 key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1166                 rkp = &key;
1167         }
1168         /*
1169          * Update the parent key values of right.
1170          */
1171         if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
1172                 return error;
1173         /*
1174          * Slide the cursor value left one.
1175          */
1176         cur->bc_ptrs[level]--;
1177         *stat = 1;
1178         return 0;
1179 }
1180
1181 /*
1182  * Allocate a new root block, fill it in.
1183  */
1184 STATIC int                              /* error */
1185 xfs_inobt_newroot(
1186         xfs_btree_cur_t         *cur,   /* btree cursor */
1187         int                     *stat)  /* success/failure */
1188 {
1189         xfs_agi_t               *agi;   /* a.g. inode header */
1190         xfs_alloc_arg_t         args;   /* allocation argument structure */
1191         xfs_inobt_block_t       *block; /* one half of the old root block */
1192         xfs_buf_t               *bp;    /* buffer containing block */
1193         int                     error;  /* error return value */
1194         xfs_inobt_key_t         *kp;    /* btree key pointer */
1195         xfs_agblock_t           lbno;   /* left block number */
1196         xfs_buf_t               *lbp;   /* left buffer pointer */
1197         xfs_inobt_block_t       *left;  /* left btree block */
1198         xfs_buf_t               *nbp;   /* new (root) buffer */
1199         xfs_inobt_block_t       *new;   /* new (root) btree block */
1200         int                     nptr;   /* new value for key index, 1 or 2 */
1201         xfs_inobt_ptr_t         *pp;    /* btree address pointer */
1202         xfs_agblock_t           rbno;   /* right block number */
1203         xfs_buf_t               *rbp;   /* right buffer pointer */
1204         xfs_inobt_block_t       *right; /* right btree block */
1205         xfs_inobt_rec_t         *rp;    /* btree record pointer */
1206
1207         ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
1208
1209         /*
1210          * Get a block & a buffer.
1211          */
1212         agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
1213         args.tp = cur->bc_tp;
1214         args.mp = cur->bc_mp;
1215         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno,
1216                 be32_to_cpu(agi->agi_root));
1217         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1218                 args.isfl = args.userdata = args.minalignslop = 0;
1219         args.minlen = args.maxlen = args.prod = 1;
1220         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1221         if ((error = xfs_alloc_vextent(&args)))
1222                 return error;
1223         /*
1224          * None available, we fail.
1225          */
1226         if (args.fsbno == NULLFSBLOCK) {
1227                 *stat = 0;
1228                 return 0;
1229         }
1230         ASSERT(args.len == 1);
1231         nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1232         new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1233         /*
1234          * Set the root data in the a.g. inode structure.
1235          */
1236         agi->agi_root = cpu_to_be32(args.agbno);
1237         be32_add(&agi->agi_level, 1);
1238         xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
1239                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1240         /*
1241          * At the previous root level there are now two blocks: the old
1242          * root, and the new block generated when it was split.
1243          * We don't know which one the cursor is pointing at, so we
1244          * set up variables "left" and "right" for each case.
1245          */
1246         bp = cur->bc_bufs[cur->bc_nlevels - 1];
1247         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1248 #ifdef DEBUG
1249         if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1250                 return error;
1251 #endif
1252         if (be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1253                 /*
1254                  * Our block is left, pick up the right block.
1255                  */
1256                 lbp = bp;
1257                 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1258                 left = block;
1259                 rbno = be32_to_cpu(left->bb_rightsib);
1260                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1261                                 rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1262                         return error;
1263                 bp = rbp;
1264                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1265                 if ((error = xfs_btree_check_sblock(cur, right,
1266                                 cur->bc_nlevels - 1, rbp)))
1267                         return error;
1268                 nptr = 1;
1269         } else {
1270                 /*
1271                  * Our block is right, pick up the left block.
1272                  */
1273                 rbp = bp;
1274                 rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1275                 right = block;
1276                 lbno = be32_to_cpu(right->bb_leftsib);
1277                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1278                                 lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1279                         return error;
1280                 bp = lbp;
1281                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1282                 if ((error = xfs_btree_check_sblock(cur, left,
1283                                 cur->bc_nlevels - 1, lbp)))
1284                         return error;
1285                 nptr = 2;
1286         }
1287         /*
1288          * Fill in the new block's btree header and log it.
1289          */
1290         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1291         new->bb_level = cpu_to_be16(cur->bc_nlevels);
1292         new->bb_numrecs = cpu_to_be16(2);
1293         new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1294         new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1295         xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1296         ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1297         /*
1298          * Fill in the key data in the new root.
1299          */
1300         kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1301         if (be16_to_cpu(left->bb_level) > 0) {
1302                 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */
1303                 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */
1304         } else {
1305                 rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1306                 INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT);
1307                 rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1308                 INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT);
1309         }
1310         xfs_inobt_log_keys(cur, nbp, 1, 2);
1311         /*
1312          * Fill in the pointer data in the new root.
1313          */
1314         pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1315         pp[0] = cpu_to_be32(lbno);
1316         pp[1] = cpu_to_be32(rbno);
1317         xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1318         /*
1319          * Fix up the cursor.
1320          */
1321         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1322         cur->bc_ptrs[cur->bc_nlevels] = nptr;
1323         cur->bc_nlevels++;
1324         *stat = 1;
1325         return 0;
1326 }
1327
1328 /*
1329  * Move 1 record right from cur/level if possible.
1330  * Update cur to reflect the new path.
1331  */
1332 STATIC int                              /* error */
1333 xfs_inobt_rshift(
1334         xfs_btree_cur_t         *cur,   /* btree cursor */
1335         int                     level,  /* level to shift record on */
1336         int                     *stat)  /* success/failure */
1337 {
1338         int                     error;  /* error return value */
1339         int                     i;      /* loop index */
1340         xfs_inobt_key_t         key;    /* key value for leaf level upward */
1341         xfs_buf_t               *lbp;   /* buffer for left (current) block */
1342         xfs_inobt_block_t       *left;  /* left (current) btree block */
1343         xfs_inobt_key_t         *lkp;   /* key pointer for left block */
1344         xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
1345         xfs_inobt_rec_t         *lrp;   /* record pointer for left block */
1346         xfs_buf_t               *rbp;   /* buffer for right neighbor block */
1347         xfs_inobt_block_t       *right; /* right neighbor btree block */
1348         xfs_inobt_key_t         *rkp;   /* key pointer for right block */
1349         xfs_inobt_ptr_t         *rpp;   /* address pointer for right block */
1350         xfs_inobt_rec_t         *rrp=NULL;      /* record pointer for right block */
1351         xfs_btree_cur_t         *tcur;  /* temporary cursor */
1352
1353         /*
1354          * Set up variables for this block as "left".
1355          */
1356         lbp = cur->bc_bufs[level];
1357         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1358 #ifdef DEBUG
1359         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1360                 return error;
1361 #endif
1362         /*
1363          * If we've got no right sibling then we can't shift an entry right.
1364          */
1365         if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1366                 *stat = 0;
1367                 return 0;
1368         }
1369         /*
1370          * If the cursor entry is the one that would be moved, don't
1371          * do it... it's too complicated.
1372          */
1373         if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1374                 *stat = 0;
1375                 return 0;
1376         }
1377         /*
1378          * Set up the right neighbor as "right".
1379          */
1380         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1381                         cur->bc_private.i.agno, be32_to_cpu(left->bb_rightsib),
1382                         0, &rbp, XFS_INO_BTREE_REF)))
1383                 return error;
1384         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1385         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1386                 return error;
1387         /*
1388          * If it's full, it can't take another entry.
1389          */
1390         if (be16_to_cpu(right->bb_numrecs) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1391                 *stat = 0;
1392                 return 0;
1393         }
1394         /*
1395          * Make a hole at the start of the right neighbor block, then
1396          * copy the last left block entry to the hole.
1397          */
1398         if (level > 0) {
1399                 lkp = XFS_INOBT_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1400                 lpp = XFS_INOBT_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1401                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1402                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1403 #ifdef DEBUG
1404                 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1405                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1406                                 return error;
1407                 }
1408 #endif
1409                 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1410                 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1411 #ifdef DEBUG
1412                 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1413                         return error;
1414 #endif
1415                 *rkp = *lkp; /* INT_: no change copy */
1416                 *rpp = *lpp; /* INT_: no change copy */
1417                 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1418                 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1419         } else {
1420                 lrp = XFS_INOBT_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1421                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1422                 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1423                 *rrp = *lrp;
1424                 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1425                 key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1426                 rkp = &key;
1427         }
1428         /*
1429          * Decrement and log left's numrecs, bump and log right's numrecs.
1430          */
1431         be16_add(&left->bb_numrecs, -1);
1432         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1433         be16_add(&right->bb_numrecs, 1);
1434 #ifdef DEBUG
1435         if (level > 0)
1436                 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1437         else
1438                 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1439 #endif
1440         xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1441         /*
1442          * Using a temporary cursor, update the parent key values of the
1443          * block on the right.
1444          */
1445         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1446                 return error;
1447         xfs_btree_lastrec(tcur, level);
1448         if ((error = xfs_inobt_increment(tcur, level, &i)) ||
1449             (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
1450                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1451                 return error;
1452         }
1453         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1454         *stat = 1;
1455         return 0;
1456 }
1457
1458 /*
1459  * Split cur/level block in half.
1460  * Return new block number and its first record (to be inserted into parent).
1461  */
1462 STATIC int                              /* error */
1463 xfs_inobt_split(
1464         xfs_btree_cur_t         *cur,   /* btree cursor */
1465         int                     level,  /* level to split */
1466         xfs_agblock_t           *bnop,  /* output: block number allocated */
1467         xfs_inobt_key_t         *keyp,  /* output: first key of new block */
1468         xfs_btree_cur_t         **curp, /* output: new cursor */
1469         int                     *stat)  /* success/failure */
1470 {
1471         xfs_alloc_arg_t         args;   /* allocation argument structure */
1472         int                     error;  /* error return value */
1473         int                     i;      /* loop index/record number */
1474         xfs_agblock_t           lbno;   /* left (current) block number */
1475         xfs_buf_t               *lbp;   /* buffer for left block */
1476         xfs_inobt_block_t       *left;  /* left (current) btree block */
1477         xfs_inobt_key_t         *lkp;   /* left btree key pointer */
1478         xfs_inobt_ptr_t         *lpp;   /* left btree address pointer */
1479         xfs_inobt_rec_t         *lrp;   /* left btree record pointer */
1480         xfs_buf_t               *rbp;   /* buffer for right block */
1481         xfs_inobt_block_t       *right; /* right (new) btree block */
1482         xfs_inobt_key_t         *rkp;   /* right btree key pointer */
1483         xfs_inobt_ptr_t         *rpp;   /* right btree address pointer */
1484         xfs_inobt_rec_t         *rrp;   /* right btree record pointer */
1485
1486         /*
1487          * Set up left block (current one).
1488          */
1489         lbp = cur->bc_bufs[level];
1490         args.tp = cur->bc_tp;
1491         args.mp = cur->bc_mp;
1492         lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1493         /*
1494          * Allocate the new block.
1495          * If we can't do it, we're toast.  Give up.
1496          */
1497         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno);
1498         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1499                 args.isfl = args.userdata = args.minalignslop = 0;
1500         args.minlen = args.maxlen = args.prod = 1;
1501         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1502         if ((error = xfs_alloc_vextent(&args)))
1503                 return error;
1504         if (args.fsbno == NULLFSBLOCK) {
1505                 *stat = 0;
1506                 return 0;
1507         }
1508         ASSERT(args.len == 1);
1509         rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1510         /*
1511          * Set up the new block as "right".
1512          */
1513         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1514         /*
1515          * "Left" is the current (according to the cursor) block.
1516          */
1517         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1518 #ifdef DEBUG
1519         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1520                 return error;
1521 #endif
1522         /*
1523          * Fill in the btree header for the new block.
1524          */
1525         right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1526         right->bb_level = left->bb_level;
1527         right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1528         /*
1529          * Make sure that if there's an odd number of entries now, that
1530          * each new block will have the same number of entries.
1531          */
1532         if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1533             cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1534                 be16_add(&right->bb_numrecs, 1);
1535         i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1536         /*
1537          * For non-leaf blocks, copy keys and addresses over to the new block.
1538          */
1539         if (level > 0) {
1540                 lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1541                 lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1542                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1543                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1544 #ifdef DEBUG
1545                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1546                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1547                                 return error;
1548                 }
1549 #endif
1550                 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1551                 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1552                 xfs_inobt_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1553                 xfs_inobt_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1554                 *keyp = *rkp;
1555         }
1556         /*
1557          * For leaf blocks, copy records over to the new block.
1558          */
1559         else {
1560                 lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1561                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1562                 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1563                 xfs_inobt_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1564                 keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
1565         }
1566         /*
1567          * Find the left block number by looking in the buffer.
1568          * Adjust numrecs, sibling pointers.
1569          */
1570         be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1571         right->bb_rightsib = left->bb_rightsib;
1572         left->bb_rightsib = cpu_to_be32(args.agbno);
1573         right->bb_leftsib = cpu_to_be32(lbno);
1574         xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1575         xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1576         /*
1577          * If there's a block to the new block's right, make that block
1578          * point back to right instead of to left.
1579          */
1580         if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1581                 xfs_inobt_block_t       *rrblock;       /* rr btree block */
1582                 xfs_buf_t               *rrbp;          /* buffer for rrblock */
1583
1584                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1585                                 be32_to_cpu(right->bb_rightsib), 0, &rrbp,
1586                                 XFS_INO_BTREE_REF)))
1587                         return error;
1588                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1589                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1590                         return error;
1591                 rrblock->bb_leftsib = cpu_to_be32(args.agbno);
1592                 xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1593         }
1594         /*
1595          * If the cursor is really in the right block, move it there.
1596          * If it's just pointing past the last entry in left, then we'll
1597          * insert there, so don't change anything in that case.
1598          */
1599         if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1600                 xfs_btree_setbuf(cur, level, rbp);
1601                 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1602         }
1603         /*
1604          * If there are more levels, we'll need another cursor which refers
1605          * the right block, no matter where this cursor was.
1606          */
1607         if (level + 1 < cur->bc_nlevels) {
1608                 if ((error = xfs_btree_dup_cursor(cur, curp)))
1609                         return error;
1610                 (*curp)->bc_ptrs[level + 1]++;
1611         }
1612         *bnop = args.agbno;
1613         *stat = 1;
1614         return 0;
1615 }
1616
1617 /*
1618  * Update keys at all levels from here to the root along the cursor's path.
1619  */
1620 STATIC int                              /* error */
1621 xfs_inobt_updkey(
1622         xfs_btree_cur_t         *cur,   /* btree cursor */
1623         xfs_inobt_key_t         *keyp,  /* new key value to update to */
1624         int                     level)  /* starting level for update */
1625 {
1626         int                     ptr;    /* index of key in block */
1627
1628         /*
1629          * Go up the tree from this level toward the root.
1630          * At each level, update the key value to the value input.
1631          * Stop when we reach a level where the cursor isn't pointing
1632          * at the first entry in the block.
1633          */
1634         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1635                 xfs_buf_t               *bp;    /* buffer for block */
1636                 xfs_inobt_block_t       *block; /* btree block */
1637 #ifdef DEBUG
1638                 int                     error;  /* error return value */
1639 #endif
1640                 xfs_inobt_key_t         *kp;    /* ptr to btree block keys */
1641
1642                 bp = cur->bc_bufs[level];
1643                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1644 #ifdef DEBUG
1645                 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1646                         return error;
1647 #endif
1648                 ptr = cur->bc_ptrs[level];
1649                 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
1650                 *kp = *keyp;
1651                 xfs_inobt_log_keys(cur, bp, ptr, ptr);
1652         }
1653         return 0;
1654 }
1655
1656 /*
1657  * Externally visible routines.
1658  */
1659
1660 /*
1661  * Decrement cursor by one record at the level.
1662  * For nonzero levels the leaf-ward information is untouched.
1663  */
1664 int                                     /* error */
1665 xfs_inobt_decrement(
1666         xfs_btree_cur_t         *cur,   /* btree cursor */
1667         int                     level,  /* level in btree, 0 is leaf */
1668         int                     *stat)  /* success/failure */
1669 {
1670         xfs_inobt_block_t       *block; /* btree block */
1671         int                     error;
1672         int                     lev;    /* btree level */
1673
1674         ASSERT(level < cur->bc_nlevels);
1675         /*
1676          * Read-ahead to the left at this level.
1677          */
1678         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1679         /*
1680          * Decrement the ptr at this level.  If we're still in the block
1681          * then we're done.
1682          */
1683         if (--cur->bc_ptrs[level] > 0) {
1684                 *stat = 1;
1685                 return 0;
1686         }
1687         /*
1688          * Get a pointer to the btree block.
1689          */
1690         block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
1691 #ifdef DEBUG
1692         if ((error = xfs_btree_check_sblock(cur, block, level,
1693                         cur->bc_bufs[level])))
1694                 return error;
1695 #endif
1696         /*
1697          * If we just went off the left edge of the tree, return failure.
1698          */
1699         if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1700                 *stat = 0;
1701                 return 0;
1702         }
1703         /*
1704          * March up the tree decrementing pointers.
1705          * Stop when we don't go off the left edge of a block.
1706          */
1707         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1708                 if (--cur->bc_ptrs[lev] > 0)
1709                         break;
1710                 /*
1711                  * Read-ahead the left block, we're going to read it
1712                  * in the next loop.
1713                  */
1714                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1715         }
1716         /*
1717          * If we went off the root then we are seriously confused.
1718          */
1719         ASSERT(lev < cur->bc_nlevels);
1720         /*
1721          * Now walk back down the tree, fixing up the cursor's buffer
1722          * pointers and key numbers.
1723          */
1724         for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1725                 xfs_agblock_t   agbno;  /* block number of btree block */
1726                 xfs_buf_t       *bp;    /* buffer containing btree block */
1727
1728                 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1729                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1730                                 cur->bc_private.i.agno, agbno, 0, &bp,
1731                                 XFS_INO_BTREE_REF)))
1732                         return error;
1733                 lev--;
1734                 xfs_btree_setbuf(cur, lev, bp);
1735                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1736                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1737                         return error;
1738                 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1739         }
1740         *stat = 1;
1741         return 0;
1742 }
1743
1744 /*
1745  * Delete the record pointed to by cur.
1746  * The cursor refers to the place where the record was (could be inserted)
1747  * when the operation returns.
1748  */
1749 int                                     /* error */
1750 xfs_inobt_delete(
1751         xfs_btree_cur_t *cur,           /* btree cursor */
1752         int             *stat)          /* success/failure */
1753 {
1754         int             error;
1755         int             i;              /* result code */
1756         int             level;          /* btree level */
1757
1758         /*
1759          * Go up the tree, starting at leaf level.
1760          * If 2 is returned then a join was done; go to the next level.
1761          * Otherwise we are done.
1762          */
1763         for (level = 0, i = 2; i == 2; level++) {
1764                 if ((error = xfs_inobt_delrec(cur, level, &i)))
1765                         return error;
1766         }
1767         if (i == 0) {
1768                 for (level = 1; level < cur->bc_nlevels; level++) {
1769                         if (cur->bc_ptrs[level] == 0) {
1770                                 if ((error = xfs_inobt_decrement(cur, level, &i)))
1771                                         return error;
1772                                 break;
1773                         }
1774                 }
1775         }
1776         *stat = i;
1777         return 0;
1778 }
1779
1780
1781 /*
1782  * Get the data from the pointed-to record.
1783  */
1784 int                                     /* error */
1785 xfs_inobt_get_rec(
1786         xfs_btree_cur_t         *cur,   /* btree cursor */
1787         xfs_agino_t             *ino,   /* output: starting inode of chunk */
1788         __int32_t               *fcnt,  /* output: number of free inodes */
1789         xfs_inofree_t           *free,  /* output: free inode mask */
1790         int                     *stat)  /* output: success/failure */
1791 {
1792         xfs_inobt_block_t       *block; /* btree block */
1793         xfs_buf_t               *bp;    /* buffer containing btree block */
1794 #ifdef DEBUG
1795         int                     error;  /* error return value */
1796 #endif
1797         int                     ptr;    /* record number */
1798         xfs_inobt_rec_t         *rec;   /* record data */
1799
1800         bp = cur->bc_bufs[0];
1801         ptr = cur->bc_ptrs[0];
1802         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1803 #ifdef DEBUG
1804         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1805                 return error;
1806 #endif
1807         /*
1808          * Off the right end or left end, return failure.
1809          */
1810         if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1811                 *stat = 0;
1812                 return 0;
1813         }
1814         /*
1815          * Point to the record and extract its data.
1816          */
1817         rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1818         *ino = INT_GET(rec->ir_startino, ARCH_CONVERT);
1819         *fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT);
1820         *free = INT_GET(rec->ir_free, ARCH_CONVERT);
1821         *stat = 1;
1822         return 0;
1823 }
1824
1825 /*
1826  * Increment cursor by one record at the level.
1827  * For nonzero levels the leaf-ward information is untouched.
1828  */
1829 int                                     /* error */
1830 xfs_inobt_increment(
1831         xfs_btree_cur_t         *cur,   /* btree cursor */
1832         int                     level,  /* level in btree, 0 is leaf */
1833         int                     *stat)  /* success/failure */
1834 {
1835         xfs_inobt_block_t       *block; /* btree block */
1836         xfs_buf_t               *bp;    /* buffer containing btree block */
1837         int                     error;  /* error return value */
1838         int                     lev;    /* btree level */
1839
1840         ASSERT(level < cur->bc_nlevels);
1841         /*
1842          * Read-ahead to the right at this level.
1843          */
1844         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1845         /*
1846          * Get a pointer to the btree block.
1847          */
1848         bp = cur->bc_bufs[level];
1849         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1850 #ifdef DEBUG
1851         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1852                 return error;
1853 #endif
1854         /*
1855          * Increment the ptr at this level.  If we're still in the block
1856          * then we're done.
1857          */
1858         if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1859                 *stat = 1;
1860                 return 0;
1861         }
1862         /*
1863          * If we just went off the right edge of the tree, return failure.
1864          */
1865         if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1866                 *stat = 0;
1867                 return 0;
1868         }
1869         /*
1870          * March up the tree incrementing pointers.
1871          * Stop when we don't go off the right edge of a block.
1872          */
1873         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1874                 bp = cur->bc_bufs[lev];
1875                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1876 #ifdef DEBUG
1877                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1878                         return error;
1879 #endif
1880                 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
1881                         break;
1882                 /*
1883                  * Read-ahead the right block, we're going to read it
1884                  * in the next loop.
1885                  */
1886                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1887         }
1888         /*
1889          * If we went off the root then we are seriously confused.
1890          */
1891         ASSERT(lev < cur->bc_nlevels);
1892         /*
1893          * Now walk back down the tree, fixing up the cursor's buffer
1894          * pointers and key numbers.
1895          */
1896         for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
1897              lev > level; ) {
1898                 xfs_agblock_t   agbno;  /* block number of btree block */
1899
1900                 agbno = be32_to_cpu(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1901                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1902                                 cur->bc_private.i.agno, agbno, 0, &bp,
1903                                 XFS_INO_BTREE_REF)))
1904                         return error;
1905                 lev--;
1906                 xfs_btree_setbuf(cur, lev, bp);
1907                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1908                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1909                         return error;
1910                 cur->bc_ptrs[lev] = 1;
1911         }
1912         *stat = 1;
1913         return 0;
1914 }
1915
1916 /*
1917  * Insert the current record at the point referenced by cur.
1918  * The cursor may be inconsistent on return if splits have been done.
1919  */
1920 int                                     /* error */
1921 xfs_inobt_insert(
1922         xfs_btree_cur_t *cur,           /* btree cursor */
1923         int             *stat)          /* success/failure */
1924 {
1925         int             error;          /* error return value */
1926         int             i;              /* result value, 0 for failure */
1927         int             level;          /* current level number in btree */
1928         xfs_agblock_t   nbno;           /* new block number (split result) */
1929         xfs_btree_cur_t *ncur;          /* new cursor (split result) */
1930         xfs_inobt_rec_t nrec;           /* record being inserted this level */
1931         xfs_btree_cur_t *pcur;          /* previous level's cursor */
1932
1933         level = 0;
1934         nbno = NULLAGBLOCK;
1935         INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino);
1936         INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount);
1937         INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free);
1938         ncur = (xfs_btree_cur_t *)0;
1939         pcur = cur;
1940         /*
1941          * Loop going up the tree, starting at the leaf level.
1942          * Stop when we don't get a split block, that must mean that
1943          * the insert is finished with this level.
1944          */
1945         do {
1946                 /*
1947                  * Insert nrec/nbno into this level of the tree.
1948                  * Note if we fail, nbno will be null.
1949                  */
1950                 if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1951                                 &i))) {
1952                         if (pcur != cur)
1953                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1954                         return error;
1955                 }
1956                 /*
1957                  * See if the cursor we just used is trash.
1958                  * Can't trash the caller's cursor, but otherwise we should
1959                  * if ncur is a new cursor or we're about to be done.
1960                  */
1961                 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1962                         cur->bc_nlevels = pcur->bc_nlevels;
1963                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1964                 }
1965                 /*
1966                  * If we got a new cursor, switch to it.
1967                  */
1968                 if (ncur) {
1969                         pcur = ncur;
1970                         ncur = (xfs_btree_cur_t *)0;
1971                 }
1972         } while (nbno != NULLAGBLOCK);
1973         *stat = i;
1974         return 0;
1975 }
1976
1977 /*
1978  * Lookup the record equal to ino in the btree given by cur.
1979  */
1980 int                                     /* error */
1981 xfs_inobt_lookup_eq(
1982         xfs_btree_cur_t *cur,           /* btree cursor */
1983         xfs_agino_t     ino,            /* starting inode of chunk */
1984         __int32_t       fcnt,           /* free inode count */
1985         xfs_inofree_t   free,           /* free inode mask */
1986         int             *stat)          /* success/failure */
1987 {
1988         cur->bc_rec.i.ir_startino = ino;
1989         cur->bc_rec.i.ir_freecount = fcnt;
1990         cur->bc_rec.i.ir_free = free;
1991         return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
1992 }
1993
1994 /*
1995  * Lookup the first record greater than or equal to ino
1996  * in the btree given by cur.
1997  */
1998 int                                     /* error */
1999 xfs_inobt_lookup_ge(
2000         xfs_btree_cur_t *cur,           /* btree cursor */
2001         xfs_agino_t     ino,            /* starting inode of chunk */
2002         __int32_t       fcnt,           /* free inode count */
2003         xfs_inofree_t   free,           /* free inode mask */
2004         int             *stat)          /* success/failure */
2005 {
2006         cur->bc_rec.i.ir_startino = ino;
2007         cur->bc_rec.i.ir_freecount = fcnt;
2008         cur->bc_rec.i.ir_free = free;
2009         return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
2010 }
2011
2012 /*
2013  * Lookup the first record less than or equal to ino
2014  * in the btree given by cur.
2015  */
2016 int                                     /* error */
2017 xfs_inobt_lookup_le(
2018         xfs_btree_cur_t *cur,           /* btree cursor */
2019         xfs_agino_t     ino,            /* starting inode of chunk */
2020         __int32_t       fcnt,           /* free inode count */
2021         xfs_inofree_t   free,           /* free inode mask */
2022         int             *stat)          /* success/failure */
2023 {
2024         cur->bc_rec.i.ir_startino = ino;
2025         cur->bc_rec.i.ir_freecount = fcnt;
2026         cur->bc_rec.i.ir_free = free;
2027         return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
2028 }
2029
2030 /*
2031  * Update the record referred to by cur, to the value given
2032  * by [ino, fcnt, free].
2033  * This either works (return 0) or gets an EFSCORRUPTED error.
2034  */
2035 int                                     /* error */
2036 xfs_inobt_update(
2037         xfs_btree_cur_t         *cur,   /* btree cursor */
2038         xfs_agino_t             ino,    /* starting inode of chunk */
2039         __int32_t               fcnt,   /* free inode count */
2040         xfs_inofree_t           free)   /* free inode mask */
2041 {
2042         xfs_inobt_block_t       *block; /* btree block to update */
2043         xfs_buf_t               *bp;    /* buffer containing btree block */
2044         int                     error;  /* error return value */
2045         int                     ptr;    /* current record number (updating) */
2046         xfs_inobt_rec_t         *rp;    /* pointer to updated record */
2047
2048         /*
2049          * Pick up the current block.
2050          */
2051         bp = cur->bc_bufs[0];
2052         block = XFS_BUF_TO_INOBT_BLOCK(bp);
2053 #ifdef DEBUG
2054         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
2055                 return error;
2056 #endif
2057         /*
2058          * Get the address of the rec to be updated.
2059          */
2060         ptr = cur->bc_ptrs[0];
2061         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
2062         /*
2063          * Fill in the new contents and log them.
2064          */
2065         INT_SET(rp->ir_startino, ARCH_CONVERT, ino);
2066         INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt);
2067         INT_SET(rp->ir_free, ARCH_CONVERT, free);
2068         xfs_inobt_log_recs(cur, bp, ptr, ptr);
2069         /*
2070          * Updating first record in leaf. Pass new key value up to our parent.
2071          */
2072         if (ptr == 1) {
2073                 xfs_inobt_key_t key;    /* key containing [ino] */
2074
2075                 INT_SET(key.ir_startino, ARCH_CONVERT, ino);
2076                 if ((error = xfs_inobt_updkey(cur, &key, 1)))
2077                         return error;
2078         }
2079         return 0;
2080 }