Pull ec into release branch
[linux-2.6] / fs / xfs / xfs_alloc_btree.c
1 /*
2  * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_bit.h"
22 #include "xfs_log.h"
23 #include "xfs_inum.h"
24 #include "xfs_trans.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_dir.h"
28 #include "xfs_dir2.h"
29 #include "xfs_dmapi.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir_sf.h"
35 #include "xfs_dir2_sf.h"
36 #include "xfs_attr_sf.h"
37 #include "xfs_dinode.h"
38 #include "xfs_inode.h"
39 #include "xfs_btree.h"
40 #include "xfs_ialloc.h"
41 #include "xfs_alloc.h"
42 #include "xfs_error.h"
43
44 /*
45  * Prototypes for internal functions.
46  */
47
48 STATIC void xfs_alloc_log_block(xfs_trans_t *, xfs_buf_t *, int);
49 STATIC void xfs_alloc_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
50 STATIC void xfs_alloc_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
51 STATIC void xfs_alloc_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
52 STATIC int xfs_alloc_lshift(xfs_btree_cur_t *, int, int *);
53 STATIC int xfs_alloc_newroot(xfs_btree_cur_t *, int *);
54 STATIC int xfs_alloc_rshift(xfs_btree_cur_t *, int, int *);
55 STATIC int xfs_alloc_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
56                 xfs_alloc_key_t *, xfs_btree_cur_t **, int *);
57 STATIC int xfs_alloc_updkey(xfs_btree_cur_t *, xfs_alloc_key_t *, int);
58
59 /*
60  * Internal functions.
61  */
62
63 /*
64  * Single level of the xfs_alloc_delete record deletion routine.
65  * Delete record pointed to by cur/level.
66  * Remove the record from its block then rebalance the tree.
67  * Return 0 for error, 1 for done, 2 to go on to the next level.
68  */
69 STATIC int                              /* error */
70 xfs_alloc_delrec(
71         xfs_btree_cur_t         *cur,   /* btree cursor */
72         int                     level,  /* level removing record from */
73         int                     *stat)  /* fail/done/go-on */
74 {
75         xfs_agf_t               *agf;   /* allocation group freelist header */
76         xfs_alloc_block_t       *block; /* btree block record/key lives in */
77         xfs_agblock_t           bno;    /* btree block number */
78         xfs_buf_t               *bp;    /* buffer for block */
79         int                     error;  /* error return value */
80         int                     i;      /* loop index */
81         xfs_alloc_key_t         key;    /* kp points here if block is level 0 */
82         xfs_agblock_t           lbno;   /* left block's block number */
83         xfs_buf_t               *lbp;   /* left block's buffer pointer */
84         xfs_alloc_block_t       *left;  /* left btree block */
85         xfs_alloc_key_t         *lkp=NULL;      /* left block key pointer */
86         xfs_alloc_ptr_t         *lpp=NULL;      /* left block address pointer */
87         int                     lrecs=0;        /* number of records in left block */
88         xfs_alloc_rec_t         *lrp;   /* left block record pointer */
89         xfs_mount_t             *mp;    /* mount structure */
90         int                     ptr;    /* index in btree block for this rec */
91         xfs_agblock_t           rbno;   /* right block's block number */
92         xfs_buf_t               *rbp;   /* right block's buffer pointer */
93         xfs_alloc_block_t       *right; /* right btree block */
94         xfs_alloc_key_t         *rkp;   /* right block key pointer */
95         xfs_alloc_ptr_t         *rpp;   /* right block address pointer */
96         int                     rrecs=0;        /* number of records in right block */
97         xfs_alloc_rec_t         *rrp;   /* right block record pointer */
98         xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
99
100         /*
101          * Get the index of the entry being deleted, check for nothing there.
102          */
103         ptr = cur->bc_ptrs[level];
104         if (ptr == 0) {
105                 *stat = 0;
106                 return 0;
107         }
108         /*
109          * Get the buffer & block containing the record or key/ptr.
110          */
111         bp = cur->bc_bufs[level];
112         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
113 #ifdef DEBUG
114         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
115                 return error;
116 #endif
117         /*
118          * Fail if we're off the end of the block.
119          */
120         if (ptr > be16_to_cpu(block->bb_numrecs)) {
121                 *stat = 0;
122                 return 0;
123         }
124         XFS_STATS_INC(xs_abt_delrec);
125         /*
126          * It's a nonleaf.  Excise the key and ptr being deleted, by
127          * sliding the entries past them down one.
128          * Log the changed areas of the block.
129          */
130         if (level > 0) {
131                 lkp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
132                 lpp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
133 #ifdef DEBUG
134                 for (i = ptr; i < be16_to_cpu(block->bb_numrecs); i++) {
135                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
136                                 return error;
137                 }
138 #endif
139                 if (ptr < be16_to_cpu(block->bb_numrecs)) {
140                         memmove(&lkp[ptr - 1], &lkp[ptr],
141                                 (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lkp));
142                         memmove(&lpp[ptr - 1], &lpp[ptr],
143                                 (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lpp));
144                         xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
145                         xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
146                 }
147         }
148         /*
149          * It's a leaf.  Excise the record being deleted, by sliding the
150          * entries past it down one.  Log the changed areas of the block.
151          */
152         else {
153                 lrp = XFS_ALLOC_REC_ADDR(block, 1, cur);
154                 if (ptr < be16_to_cpu(block->bb_numrecs)) {
155                         memmove(&lrp[ptr - 1], &lrp[ptr],
156                                 (be16_to_cpu(block->bb_numrecs) - ptr) * sizeof(*lrp));
157                         xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs) - 1);
158                 }
159                 /*
160                  * If it's the first record in the block, we'll need a key
161                  * structure to pass up to the next level (updkey).
162                  */
163                 if (ptr == 1) {
164                         key.ar_startblock = lrp->ar_startblock;
165                         key.ar_blockcount = lrp->ar_blockcount;
166                         lkp = &key;
167                 }
168         }
169         /*
170          * Decrement and log the number of entries in the block.
171          */
172         be16_add(&block->bb_numrecs, -1);
173         xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
174         /*
175          * See if the longest free extent in the allocation group was
176          * changed by this operation.  True if it's the by-size btree, and
177          * this is the leaf level, and there is no right sibling block,
178          * and this was the last record.
179          */
180         agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
181         mp = cur->bc_mp;
182
183         if (level == 0 &&
184             cur->bc_btnum == XFS_BTNUM_CNT &&
185             be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
186             ptr > be16_to_cpu(block->bb_numrecs)) {
187                 ASSERT(ptr == be16_to_cpu(block->bb_numrecs) + 1);
188                 /*
189                  * There are still records in the block.  Grab the size
190                  * from the last one.
191                  */
192                 if (be16_to_cpu(block->bb_numrecs)) {
193                         rrp = XFS_ALLOC_REC_ADDR(block, be16_to_cpu(block->bb_numrecs), cur);
194                         agf->agf_longest = rrp->ar_blockcount;
195                 }
196                 /*
197                  * No free extents left.
198                  */
199                 else
200                         agf->agf_longest = 0;
201                 mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest =
202                         be32_to_cpu(agf->agf_longest);
203                 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
204                         XFS_AGF_LONGEST);
205         }
206         /*
207          * Is this the root level?  If so, we're almost done.
208          */
209         if (level == cur->bc_nlevels - 1) {
210                 /*
211                  * If this is the root level,
212                  * and there's only one entry left,
213                  * and it's NOT the leaf level,
214                  * then we can get rid of this level.
215                  */
216                 if (be16_to_cpu(block->bb_numrecs) == 1 && level > 0) {
217                         /*
218                          * lpp is still set to the first pointer in the block.
219                          * Make it the new root of the btree.
220                          */
221                         bno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
222                         agf->agf_roots[cur->bc_btnum] = *lpp;
223                         be32_add(&agf->agf_levels[cur->bc_btnum], -1);
224                         mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_levels[cur->bc_btnum]--;
225                         /*
226                          * Put this buffer/block on the ag's freelist.
227                          */
228                         if ((error = xfs_alloc_put_freelist(cur->bc_tp,
229                                         cur->bc_private.a.agbp, NULL, bno)))
230                                 return error;
231                         /*
232                          * Since blocks move to the free list without the
233                          * coordination used in xfs_bmap_finish, we can't allow
234                          * block to be available for reallocation and
235                          * non-transaction writing (user data) until we know
236                          * that the transaction that moved it to the free list
237                          * is permanently on disk. We track the blocks by
238                          * declaring these blocks as "busy"; the busy list is
239                          * maintained on a per-ag basis and each transaction
240                          * records which entries should be removed when the
241                          * iclog commits to disk. If a busy block is
242                          * allocated, the iclog is pushed up to the LSN
243                          * that freed the block.
244                          */
245                         xfs_alloc_mark_busy(cur->bc_tp,
246                                 be32_to_cpu(agf->agf_seqno), bno, 1);
247
248                         xfs_trans_agbtree_delta(cur->bc_tp, -1);
249                         xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
250                                 XFS_AGF_ROOTS | XFS_AGF_LEVELS);
251                         /*
252                          * Update the cursor so there's one fewer level.
253                          */
254                         xfs_btree_setbuf(cur, level, NULL);
255                         cur->bc_nlevels--;
256                 } else if (level > 0 &&
257                            (error = xfs_alloc_decrement(cur, level, &i)))
258                         return error;
259                 *stat = 1;
260                 return 0;
261         }
262         /*
263          * If we deleted the leftmost entry in the block, update the
264          * key values above us in the tree.
265          */
266         if (ptr == 1 && (error = xfs_alloc_updkey(cur, lkp, level + 1)))
267                 return error;
268         /*
269          * If the number of records remaining in the block is at least
270          * the minimum, we're done.
271          */
272         if (be16_to_cpu(block->bb_numrecs) >= XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
273                 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
274                         return error;
275                 *stat = 1;
276                 return 0;
277         }
278         /*
279          * Otherwise, we have to move some records around to keep the
280          * tree balanced.  Look at the left and right sibling blocks to
281          * see if we can re-balance by moving only one record.
282          */
283         rbno = be32_to_cpu(block->bb_rightsib);
284         lbno = be32_to_cpu(block->bb_leftsib);
285         bno = NULLAGBLOCK;
286         ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
287         /*
288          * Duplicate the cursor so our btree manipulations here won't
289          * disrupt the next level up.
290          */
291         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
292                 return error;
293         /*
294          * If there's a right sibling, see if it's ok to shift an entry
295          * out of it.
296          */
297         if (rbno != NULLAGBLOCK) {
298                 /*
299                  * Move the temp cursor to the last entry in the next block.
300                  * Actually any entry but the first would suffice.
301                  */
302                 i = xfs_btree_lastrec(tcur, level);
303                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
304                 if ((error = xfs_alloc_increment(tcur, level, &i)))
305                         goto error0;
306                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
307                 i = xfs_btree_lastrec(tcur, level);
308                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
309                 /*
310                  * Grab a pointer to the block.
311                  */
312                 rbp = tcur->bc_bufs[level];
313                 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
314 #ifdef DEBUG
315                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
316                         goto error0;
317 #endif
318                 /*
319                  * Grab the current block number, for future use.
320                  */
321                 bno = be32_to_cpu(right->bb_leftsib);
322                 /*
323                  * If right block is full enough so that removing one entry
324                  * won't make it too empty, and left-shifting an entry out
325                  * of right to us works, we're done.
326                  */
327                 if (be16_to_cpu(right->bb_numrecs) - 1 >=
328                      XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
329                         if ((error = xfs_alloc_lshift(tcur, level, &i)))
330                                 goto error0;
331                         if (i) {
332                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
333                                        XFS_ALLOC_BLOCK_MINRECS(level, cur));
334                                 xfs_btree_del_cursor(tcur,
335                                                      XFS_BTREE_NOERROR);
336                                 if (level > 0 &&
337                                     (error = xfs_alloc_decrement(cur, level,
338                                             &i)))
339                                         return error;
340                                 *stat = 1;
341                                 return 0;
342                         }
343                 }
344                 /*
345                  * Otherwise, grab the number of records in right for
346                  * future reference, and fix up the temp cursor to point
347                  * to our block again (last record).
348                  */
349                 rrecs = be16_to_cpu(right->bb_numrecs);
350                 if (lbno != NULLAGBLOCK) {
351                         i = xfs_btree_firstrec(tcur, level);
352                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
353                         if ((error = xfs_alloc_decrement(tcur, level, &i)))
354                                 goto error0;
355                         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
356                 }
357         }
358         /*
359          * If there's a left sibling, see if it's ok to shift an entry
360          * out of it.
361          */
362         if (lbno != NULLAGBLOCK) {
363                 /*
364                  * Move the temp cursor to the first entry in the
365                  * previous block.
366                  */
367                 i = xfs_btree_firstrec(tcur, level);
368                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
369                 if ((error = xfs_alloc_decrement(tcur, level, &i)))
370                         goto error0;
371                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
372                 xfs_btree_firstrec(tcur, level);
373                 /*
374                  * Grab a pointer to the block.
375                  */
376                 lbp = tcur->bc_bufs[level];
377                 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
378 #ifdef DEBUG
379                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
380                         goto error0;
381 #endif
382                 /*
383                  * Grab the current block number, for future use.
384                  */
385                 bno = be32_to_cpu(left->bb_rightsib);
386                 /*
387                  * If left block is full enough so that removing one entry
388                  * won't make it too empty, and right-shifting an entry out
389                  * of left to us works, we're done.
390                  */
391                 if (be16_to_cpu(left->bb_numrecs) - 1 >=
392                      XFS_ALLOC_BLOCK_MINRECS(level, cur)) {
393                         if ((error = xfs_alloc_rshift(tcur, level, &i)))
394                                 goto error0;
395                         if (i) {
396                                 ASSERT(be16_to_cpu(block->bb_numrecs) >=
397                                        XFS_ALLOC_BLOCK_MINRECS(level, cur));
398                                 xfs_btree_del_cursor(tcur,
399                                                      XFS_BTREE_NOERROR);
400                                 if (level == 0)
401                                         cur->bc_ptrs[0]++;
402                                 *stat = 1;
403                                 return 0;
404                         }
405                 }
406                 /*
407                  * Otherwise, grab the number of records in right for
408                  * future reference.
409                  */
410                 lrecs = be16_to_cpu(left->bb_numrecs);
411         }
412         /*
413          * Delete the temp cursor, we're done with it.
414          */
415         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
416         /*
417          * If here, we need to do a join to keep the tree balanced.
418          */
419         ASSERT(bno != NULLAGBLOCK);
420         /*
421          * See if we can join with the left neighbor block.
422          */
423         if (lbno != NULLAGBLOCK &&
424             lrecs + be16_to_cpu(block->bb_numrecs) <= XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
425                 /*
426                  * Set "right" to be the starting block,
427                  * "left" to be the left neighbor.
428                  */
429                 rbno = bno;
430                 right = block;
431                 rbp = bp;
432                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
433                                 cur->bc_private.a.agno, lbno, 0, &lbp,
434                                 XFS_ALLOC_BTREE_REF)))
435                         return error;
436                 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
437                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
438                         return error;
439         }
440         /*
441          * If that won't work, see if we can join with the right neighbor block.
442          */
443         else if (rbno != NULLAGBLOCK &&
444                  rrecs + be16_to_cpu(block->bb_numrecs) <=
445                   XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
446                 /*
447                  * Set "left" to be the starting block,
448                  * "right" to be the right neighbor.
449                  */
450                 lbno = bno;
451                 left = block;
452                 lbp = bp;
453                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
454                                 cur->bc_private.a.agno, rbno, 0, &rbp,
455                                 XFS_ALLOC_BTREE_REF)))
456                         return error;
457                 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
458                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
459                         return error;
460         }
461         /*
462          * Otherwise, we can't fix the imbalance.
463          * Just return.  This is probably a logic error, but it's not fatal.
464          */
465         else {
466                 if (level > 0 && (error = xfs_alloc_decrement(cur, level, &i)))
467                         return error;
468                 *stat = 1;
469                 return 0;
470         }
471         /*
472          * We're now going to join "left" and "right" by moving all the stuff
473          * in "right" to "left" and deleting "right".
474          */
475         if (level > 0) {
476                 /*
477                  * It's a non-leaf.  Move keys and pointers.
478                  */
479                 lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
480                 lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
481                 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
482                 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
483 #ifdef DEBUG
484                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
485                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
486                                 return error;
487                 }
488 #endif
489                 memcpy(lkp, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*lkp));
490                 memcpy(lpp, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*lpp));
491                 xfs_alloc_log_keys(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
492                                    be16_to_cpu(left->bb_numrecs) +
493                                    be16_to_cpu(right->bb_numrecs));
494                 xfs_alloc_log_ptrs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
495                                    be16_to_cpu(left->bb_numrecs) +
496                                    be16_to_cpu(right->bb_numrecs));
497         } else {
498                 /*
499                  * It's a leaf.  Move records.
500                  */
501                 lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs) + 1, cur);
502                 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
503                 memcpy(lrp, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*lrp));
504                 xfs_alloc_log_recs(cur, lbp, be16_to_cpu(left->bb_numrecs) + 1,
505                                    be16_to_cpu(left->bb_numrecs) +
506                                    be16_to_cpu(right->bb_numrecs));
507         }
508         /*
509          * If we joined with the left neighbor, set the buffer in the
510          * cursor to the left block, and fix up the index.
511          */
512         if (bp != lbp) {
513                 xfs_btree_setbuf(cur, level, lbp);
514                 cur->bc_ptrs[level] += be16_to_cpu(left->bb_numrecs);
515         }
516         /*
517          * If we joined with the right neighbor and there's a level above
518          * us, increment the cursor at that level.
519          */
520         else if (level + 1 < cur->bc_nlevels &&
521                  (error = xfs_alloc_increment(cur, level + 1, &i)))
522                 return error;
523         /*
524          * Fix up the number of records in the surviving block.
525          */
526         be16_add(&left->bb_numrecs, be16_to_cpu(right->bb_numrecs));
527         /*
528          * Fix up the right block pointer in the surviving block, and log it.
529          */
530         left->bb_rightsib = right->bb_rightsib;
531         xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
532         /*
533          * If there is a right sibling now, make it point to the
534          * remaining block.
535          */
536         if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
537                 xfs_alloc_block_t       *rrblock;
538                 xfs_buf_t               *rrbp;
539
540                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
541                                 cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib), 0,
542                                 &rrbp, XFS_ALLOC_BTREE_REF)))
543                         return error;
544                 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
545                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
546                         return error;
547                 rrblock->bb_leftsib = cpu_to_be32(lbno);
548                 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
549         }
550         /*
551          * Free the deleting block by putting it on the freelist.
552          */
553         if ((error = xfs_alloc_put_freelist(cur->bc_tp, cur->bc_private.a.agbp,
554                         NULL, rbno)))
555                 return error;
556         /*
557          * Since blocks move to the free list without the coordination
558          * used in xfs_bmap_finish, we can't allow block to be available
559          * for reallocation and non-transaction writing (user data)
560          * until we know that the transaction that moved it to the free
561          * list is permanently on disk. We track the blocks by declaring
562          * these blocks as "busy"; the busy list is maintained on a
563          * per-ag basis and each transaction records which entries
564          * should be removed when the iclog commits to disk. If a
565          * busy block is allocated, the iclog is pushed up to the
566          * LSN that freed the block.
567          */
568         xfs_alloc_mark_busy(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
569         xfs_trans_agbtree_delta(cur->bc_tp, -1);
570
571         /*
572          * Adjust the current level's cursor so that we're left referring
573          * to the right node, after we're done.
574          * If this leaves the ptr value 0 our caller will fix it up.
575          */
576         if (level > 0)
577                 cur->bc_ptrs[level]--;
578         /*
579          * Return value means the next level up has something to do.
580          */
581         *stat = 2;
582         return 0;
583
584 error0:
585         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
586         return error;
587 }
588
589 /*
590  * Insert one record/level.  Return information to the caller
591  * allowing the next level up to proceed if necessary.
592  */
593 STATIC int                              /* error */
594 xfs_alloc_insrec(
595         xfs_btree_cur_t         *cur,   /* btree cursor */
596         int                     level,  /* level to insert record at */
597         xfs_agblock_t           *bnop,  /* i/o: block number inserted */
598         xfs_alloc_rec_t         *recp,  /* i/o: record data inserted */
599         xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
600         int                     *stat)  /* output: success/failure */
601 {
602         xfs_agf_t               *agf;   /* allocation group freelist header */
603         xfs_alloc_block_t       *block; /* btree block record/key lives in */
604         xfs_buf_t               *bp;    /* buffer for block */
605         int                     error;  /* error return value */
606         int                     i;      /* loop index */
607         xfs_alloc_key_t         key;    /* key value being inserted */
608         xfs_alloc_key_t         *kp;    /* pointer to btree keys */
609         xfs_agblock_t           nbno;   /* block number of allocated block */
610         xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
611         xfs_alloc_key_t         nkey;   /* new key value, from split */
612         xfs_alloc_rec_t         nrec;   /* new record value, for caller */
613         int                     optr;   /* old ptr value */
614         xfs_alloc_ptr_t         *pp;    /* pointer to btree addresses */
615         int                     ptr;    /* index in btree block for this rec */
616         xfs_alloc_rec_t         *rp;    /* pointer to btree records */
617
618         ASSERT(be32_to_cpu(recp->ar_blockcount) > 0);
619
620         /*
621          * GCC doesn't understand the (arguably complex) control flow in
622          * this function and complains about uninitialized structure fields
623          * without this.
624          */
625         memset(&nrec, 0, sizeof(nrec));
626
627         /*
628          * If we made it to the root level, allocate a new root block
629          * and we're done.
630          */
631         if (level >= cur->bc_nlevels) {
632                 XFS_STATS_INC(xs_abt_insrec);
633                 if ((error = xfs_alloc_newroot(cur, &i)))
634                         return error;
635                 *bnop = NULLAGBLOCK;
636                 *stat = i;
637                 return 0;
638         }
639         /*
640          * Make a key out of the record data to be inserted, and save it.
641          */
642         key.ar_startblock = recp->ar_startblock;
643         key.ar_blockcount = recp->ar_blockcount;
644         optr = ptr = cur->bc_ptrs[level];
645         /*
646          * If we're off the left edge, return failure.
647          */
648         if (ptr == 0) {
649                 *stat = 0;
650                 return 0;
651         }
652         XFS_STATS_INC(xs_abt_insrec);
653         /*
654          * Get pointers to the btree buffer and block.
655          */
656         bp = cur->bc_bufs[level];
657         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
658 #ifdef DEBUG
659         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
660                 return error;
661         /*
662          * Check that the new entry is being inserted in the right place.
663          */
664         if (ptr <= be16_to_cpu(block->bb_numrecs)) {
665                 if (level == 0) {
666                         rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
667                         xfs_btree_check_rec(cur->bc_btnum, recp, rp);
668                 } else {
669                         kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
670                         xfs_btree_check_key(cur->bc_btnum, &key, kp);
671                 }
672         }
673 #endif
674         nbno = NULLAGBLOCK;
675         ncur = (xfs_btree_cur_t *)0;
676         /*
677          * If the block is full, we can't insert the new entry until we
678          * make the block un-full.
679          */
680         if (be16_to_cpu(block->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
681                 /*
682                  * First, try shifting an entry to the right neighbor.
683                  */
684                 if ((error = xfs_alloc_rshift(cur, level, &i)))
685                         return error;
686                 if (i) {
687                         /* nothing */
688                 }
689                 /*
690                  * Next, try shifting an entry to the left neighbor.
691                  */
692                 else {
693                         if ((error = xfs_alloc_lshift(cur, level, &i)))
694                                 return error;
695                         if (i)
696                                 optr = ptr = cur->bc_ptrs[level];
697                         else {
698                                 /*
699                                  * Next, try splitting the current block in
700                                  * half. If this works we have to re-set our
701                                  * variables because we could be in a
702                                  * different block now.
703                                  */
704                                 if ((error = xfs_alloc_split(cur, level, &nbno,
705                                                 &nkey, &ncur, &i)))
706                                         return error;
707                                 if (i) {
708                                         bp = cur->bc_bufs[level];
709                                         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
710 #ifdef DEBUG
711                                         if ((error =
712                                                 xfs_btree_check_sblock(cur,
713                                                         block, level, bp)))
714                                                 return error;
715 #endif
716                                         ptr = cur->bc_ptrs[level];
717                                         nrec.ar_startblock = nkey.ar_startblock;
718                                         nrec.ar_blockcount = nkey.ar_blockcount;
719                                 }
720                                 /*
721                                  * Otherwise the insert fails.
722                                  */
723                                 else {
724                                         *stat = 0;
725                                         return 0;
726                                 }
727                         }
728                 }
729         }
730         /*
731          * At this point we know there's room for our new entry in the block
732          * we're pointing at.
733          */
734         if (level > 0) {
735                 /*
736                  * It's a non-leaf entry.  Make a hole for the new data
737                  * in the key and ptr regions of the block.
738                  */
739                 kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
740                 pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
741 #ifdef DEBUG
742                 for (i = be16_to_cpu(block->bb_numrecs); i >= ptr; i--) {
743                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(pp[i - 1]), level)))
744                                 return error;
745                 }
746 #endif
747                 memmove(&kp[ptr], &kp[ptr - 1],
748                         (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*kp));
749                 memmove(&pp[ptr], &pp[ptr - 1],
750                         (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*pp));
751 #ifdef DEBUG
752                 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
753                         return error;
754 #endif
755                 /*
756                  * Now stuff the new data in, bump numrecs and log the new data.
757                  */
758                 kp[ptr - 1] = key;
759                 pp[ptr - 1] = cpu_to_be32(*bnop);
760                 be16_add(&block->bb_numrecs, 1);
761                 xfs_alloc_log_keys(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
762                 xfs_alloc_log_ptrs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
763 #ifdef DEBUG
764                 if (ptr < be16_to_cpu(block->bb_numrecs))
765                         xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
766                                 kp + ptr);
767 #endif
768         } else {
769                 /*
770                  * It's a leaf entry.  Make a hole for the new record.
771                  */
772                 rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
773                 memmove(&rp[ptr], &rp[ptr - 1],
774                         (be16_to_cpu(block->bb_numrecs) - ptr + 1) * sizeof(*rp));
775                 /*
776                  * Now stuff the new record in, bump numrecs
777                  * and log the new data.
778                  */
779                 rp[ptr - 1] = *recp; /* INT_: struct copy */
780                 be16_add(&block->bb_numrecs, 1);
781                 xfs_alloc_log_recs(cur, bp, ptr, be16_to_cpu(block->bb_numrecs));
782 #ifdef DEBUG
783                 if (ptr < be16_to_cpu(block->bb_numrecs))
784                         xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
785                                 rp + ptr);
786 #endif
787         }
788         /*
789          * Log the new number of records in the btree header.
790          */
791         xfs_alloc_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
792         /*
793          * If we inserted at the start of a block, update the parents' keys.
794          */
795         if (optr == 1 && (error = xfs_alloc_updkey(cur, &key, level + 1)))
796                 return error;
797         /*
798          * Look to see if the longest extent in the allocation group
799          * needs to be updated.
800          */
801
802         agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
803         if (level == 0 &&
804             cur->bc_btnum == XFS_BTNUM_CNT &&
805             be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
806             be32_to_cpu(recp->ar_blockcount) > be32_to_cpu(agf->agf_longest)) {
807                 /*
808                  * If this is a leaf in the by-size btree and there
809                  * is no right sibling block and this block is bigger
810                  * than the previous longest block, update it.
811                  */
812                 agf->agf_longest = recp->ar_blockcount;
813                 cur->bc_mp->m_perag[be32_to_cpu(agf->agf_seqno)].pagf_longest
814                         = be32_to_cpu(recp->ar_blockcount);
815                 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
816                         XFS_AGF_LONGEST);
817         }
818         /*
819          * Return the new block number, if any.
820          * If there is one, give back a record value and a cursor too.
821          */
822         *bnop = nbno;
823         if (nbno != NULLAGBLOCK) {
824                 *recp = nrec; /* INT_: struct copy */
825                 *curp = ncur; /* INT_: struct copy */
826         }
827         *stat = 1;
828         return 0;
829 }
830
831 /*
832  * Log header fields from a btree block.
833  */
834 STATIC void
835 xfs_alloc_log_block(
836         xfs_trans_t             *tp,    /* transaction pointer */
837         xfs_buf_t               *bp,    /* buffer containing btree block */
838         int                     fields) /* mask of fields: XFS_BB_... */
839 {
840         int                     first;  /* first byte offset logged */
841         int                     last;   /* last byte offset logged */
842         static const short      offsets[] = {   /* table of offsets */
843                 offsetof(xfs_alloc_block_t, bb_magic),
844                 offsetof(xfs_alloc_block_t, bb_level),
845                 offsetof(xfs_alloc_block_t, bb_numrecs),
846                 offsetof(xfs_alloc_block_t, bb_leftsib),
847                 offsetof(xfs_alloc_block_t, bb_rightsib),
848                 sizeof(xfs_alloc_block_t)
849         };
850
851         xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
852         xfs_trans_log_buf(tp, bp, first, last);
853 }
854
855 /*
856  * Log keys from a btree block (nonleaf).
857  */
858 STATIC void
859 xfs_alloc_log_keys(
860         xfs_btree_cur_t         *cur,   /* btree cursor */
861         xfs_buf_t               *bp,    /* buffer containing btree block */
862         int                     kfirst, /* index of first key to log */
863         int                     klast)  /* index of last key to log */
864 {
865         xfs_alloc_block_t       *block; /* btree block to log from */
866         int                     first;  /* first byte offset logged */
867         xfs_alloc_key_t         *kp;    /* key pointer in btree block */
868         int                     last;   /* last byte offset logged */
869
870         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
871         kp = XFS_ALLOC_KEY_ADDR(block, 1, cur);
872         first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
873         last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
874         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
875 }
876
877 /*
878  * Log block pointer fields from a btree block (nonleaf).
879  */
880 STATIC void
881 xfs_alloc_log_ptrs(
882         xfs_btree_cur_t         *cur,   /* btree cursor */
883         xfs_buf_t               *bp,    /* buffer containing btree block */
884         int                     pfirst, /* index of first pointer to log */
885         int                     plast)  /* index of last pointer to log */
886 {
887         xfs_alloc_block_t       *block; /* btree block to log from */
888         int                     first;  /* first byte offset logged */
889         int                     last;   /* last byte offset logged */
890         xfs_alloc_ptr_t         *pp;    /* block-pointer pointer in btree blk */
891
892         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
893         pp = XFS_ALLOC_PTR_ADDR(block, 1, cur);
894         first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
895         last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
896         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
897 }
898
899 /*
900  * Log records from a btree block (leaf).
901  */
902 STATIC void
903 xfs_alloc_log_recs(
904         xfs_btree_cur_t         *cur,   /* btree cursor */
905         xfs_buf_t               *bp,    /* buffer containing btree block */
906         int                     rfirst, /* index of first record to log */
907         int                     rlast)  /* index of last record to log */
908 {
909         xfs_alloc_block_t       *block; /* btree block to log from */
910         int                     first;  /* first byte offset logged */
911         int                     last;   /* last byte offset logged */
912         xfs_alloc_rec_t         *rp;    /* record pointer for btree block */
913
914
915         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
916         rp = XFS_ALLOC_REC_ADDR(block, 1, cur);
917 #ifdef DEBUG
918         {
919                 xfs_agf_t       *agf;
920                 xfs_alloc_rec_t *p;
921
922                 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
923                 for (p = &rp[rfirst - 1]; p <= &rp[rlast - 1]; p++)
924                         ASSERT(be32_to_cpu(p->ar_startblock) +
925                                be32_to_cpu(p->ar_blockcount) <=
926                                be32_to_cpu(agf->agf_length));
927         }
928 #endif
929         first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
930         last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
931         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
932 }
933
934 /*
935  * Lookup the record.  The cursor is made to point to it, based on dir.
936  * Return 0 if can't find any such record, 1 for success.
937  */
938 STATIC int                              /* error */
939 xfs_alloc_lookup(
940         xfs_btree_cur_t         *cur,   /* btree cursor */
941         xfs_lookup_t            dir,    /* <=, ==, or >= */
942         int                     *stat)  /* success/failure */
943 {
944         xfs_agblock_t           agbno;  /* a.g. relative btree block number */
945         xfs_agnumber_t          agno;   /* allocation group number */
946         xfs_alloc_block_t       *block=NULL;    /* current btree block */
947         int                     diff;   /* difference for the current key */
948         int                     error;  /* error return value */
949         int                     keyno=0;        /* current key number */
950         int                     level;  /* level in the btree */
951         xfs_mount_t             *mp;    /* file system mount point */
952
953         XFS_STATS_INC(xs_abt_lookup);
954         /*
955          * Get the allocation group header, and the root block number.
956          */
957         mp = cur->bc_mp;
958
959         {
960                 xfs_agf_t       *agf;   /* a.g. freespace header */
961
962                 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
963                 agno = be32_to_cpu(agf->agf_seqno);
964                 agbno = be32_to_cpu(agf->agf_roots[cur->bc_btnum]);
965         }
966         /*
967          * Iterate over each level in the btree, starting at the root.
968          * For each level above the leaves, find the key we need, based
969          * on the lookup record, then follow the corresponding block
970          * pointer down to the next level.
971          */
972         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
973                 xfs_buf_t       *bp;    /* buffer pointer for btree block */
974                 xfs_daddr_t     d;      /* disk address of btree block */
975
976                 /*
977                  * Get the disk address we're looking for.
978                  */
979                 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
980                 /*
981                  * If the old buffer at this level is for a different block,
982                  * throw it away, otherwise just use it.
983                  */
984                 bp = cur->bc_bufs[level];
985                 if (bp && XFS_BUF_ADDR(bp) != d)
986                         bp = (xfs_buf_t *)0;
987                 if (!bp) {
988                         /*
989                          * Need to get a new buffer.  Read it, then
990                          * set it in the cursor, releasing the old one.
991                          */
992                         if ((error = xfs_btree_read_bufs(mp, cur->bc_tp, agno,
993                                         agbno, 0, &bp, XFS_ALLOC_BTREE_REF)))
994                                 return error;
995                         xfs_btree_setbuf(cur, level, bp);
996                         /*
997                          * Point to the btree block, now that we have the buffer
998                          */
999                         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1000                         if ((error = xfs_btree_check_sblock(cur, block, level,
1001                                         bp)))
1002                                 return error;
1003                 } else
1004                         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1005                 /*
1006                  * If we already had a key match at a higher level, we know
1007                  * we need to use the first entry in this block.
1008                  */
1009                 if (diff == 0)
1010                         keyno = 1;
1011                 /*
1012                  * Otherwise we need to search this block.  Do a binary search.
1013                  */
1014                 else {
1015                         int             high;   /* high entry number */
1016                         xfs_alloc_key_t *kkbase=NULL;/* base of keys in block */
1017                         xfs_alloc_rec_t *krbase=NULL;/* base of records in block */
1018                         int             low;    /* low entry number */
1019
1020                         /*
1021                          * Get a pointer to keys or records.
1022                          */
1023                         if (level > 0)
1024                                 kkbase = XFS_ALLOC_KEY_ADDR(block, 1, cur);
1025                         else
1026                                 krbase = XFS_ALLOC_REC_ADDR(block, 1, cur);
1027                         /*
1028                          * Set low and high entry numbers, 1-based.
1029                          */
1030                         low = 1;
1031                         if (!(high = be16_to_cpu(block->bb_numrecs))) {
1032                                 /*
1033                                  * If the block is empty, the tree must
1034                                  * be an empty leaf.
1035                                  */
1036                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
1037                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
1038                                 *stat = 0;
1039                                 return 0;
1040                         }
1041                         /*
1042                          * Binary search the block.
1043                          */
1044                         while (low <= high) {
1045                                 xfs_extlen_t    blockcount;     /* key value */
1046                                 xfs_agblock_t   startblock;     /* key value */
1047
1048                                 XFS_STATS_INC(xs_abt_compare);
1049                                 /*
1050                                  * keyno is average of low and high.
1051                                  */
1052                                 keyno = (low + high) >> 1;
1053                                 /*
1054                                  * Get startblock & blockcount.
1055                                  */
1056                                 if (level > 0) {
1057                                         xfs_alloc_key_t *kkp;
1058
1059                                         kkp = kkbase + keyno - 1;
1060                                         startblock = be32_to_cpu(kkp->ar_startblock);
1061                                         blockcount = be32_to_cpu(kkp->ar_blockcount);
1062                                 } else {
1063                                         xfs_alloc_rec_t *krp;
1064
1065                                         krp = krbase + keyno - 1;
1066                                         startblock = be32_to_cpu(krp->ar_startblock);
1067                                         blockcount = be32_to_cpu(krp->ar_blockcount);
1068                                 }
1069                                 /*
1070                                  * Compute difference to get next direction.
1071                                  */
1072                                 if (cur->bc_btnum == XFS_BTNUM_BNO)
1073                                         diff = (int)startblock -
1074                                                (int)cur->bc_rec.a.ar_startblock;
1075                                 else if (!(diff = (int)blockcount -
1076                                             (int)cur->bc_rec.a.ar_blockcount))
1077                                         diff = (int)startblock -
1078                                             (int)cur->bc_rec.a.ar_startblock;
1079                                 /*
1080                                  * Less than, move right.
1081                                  */
1082                                 if (diff < 0)
1083                                         low = keyno + 1;
1084                                 /*
1085                                  * Greater than, move left.
1086                                  */
1087                                 else if (diff > 0)
1088                                         high = keyno - 1;
1089                                 /*
1090                                  * Equal, we're done.
1091                                  */
1092                                 else
1093                                         break;
1094                         }
1095                 }
1096                 /*
1097                  * If there are more levels, set up for the next level
1098                  * by getting the block number and filling in the cursor.
1099                  */
1100                 if (level > 0) {
1101                         /*
1102                          * If we moved left, need the previous key number,
1103                          * unless there isn't one.
1104                          */
1105                         if (diff > 0 && --keyno < 1)
1106                                 keyno = 1;
1107                         agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, keyno, cur));
1108 #ifdef DEBUG
1109                         if ((error = xfs_btree_check_sptr(cur, agbno, level)))
1110                                 return error;
1111 #endif
1112                         cur->bc_ptrs[level] = keyno;
1113                 }
1114         }
1115         /*
1116          * Done with the search.
1117          * See if we need to adjust the results.
1118          */
1119         if (dir != XFS_LOOKUP_LE && diff < 0) {
1120                 keyno++;
1121                 /*
1122                  * If ge search and we went off the end of the block, but it's
1123                  * not the last block, we're in the wrong block.
1124                  */
1125                 if (dir == XFS_LOOKUP_GE &&
1126                     keyno > be16_to_cpu(block->bb_numrecs) &&
1127                     be32_to_cpu(block->bb_rightsib) != NULLAGBLOCK) {
1128                         int     i;
1129
1130                         cur->bc_ptrs[0] = keyno;
1131                         if ((error = xfs_alloc_increment(cur, 0, &i)))
1132                                 return error;
1133                         XFS_WANT_CORRUPTED_RETURN(i == 1);
1134                         *stat = 1;
1135                         return 0;
1136                 }
1137         }
1138         else if (dir == XFS_LOOKUP_LE && diff > 0)
1139                 keyno--;
1140         cur->bc_ptrs[0] = keyno;
1141         /*
1142          * Return if we succeeded or not.
1143          */
1144         if (keyno == 0 || keyno > be16_to_cpu(block->bb_numrecs))
1145                 *stat = 0;
1146         else
1147                 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1148         return 0;
1149 }
1150
1151 /*
1152  * Move 1 record left from cur/level if possible.
1153  * Update cur to reflect the new path.
1154  */
1155 STATIC int                              /* error */
1156 xfs_alloc_lshift(
1157         xfs_btree_cur_t         *cur,   /* btree cursor */
1158         int                     level,  /* level to shift record on */
1159         int                     *stat)  /* success/failure */
1160 {
1161         int                     error;  /* error return value */
1162 #ifdef DEBUG
1163         int                     i;      /* loop index */
1164 #endif
1165         xfs_alloc_key_t         key;    /* key value for leaf level upward */
1166         xfs_buf_t               *lbp;   /* buffer for left neighbor block */
1167         xfs_alloc_block_t       *left;  /* left neighbor btree block */
1168         int                     nrec;   /* new number of left block entries */
1169         xfs_buf_t               *rbp;   /* buffer for right (current) block */
1170         xfs_alloc_block_t       *right; /* right (current) btree block */
1171         xfs_alloc_key_t         *rkp=NULL;      /* key pointer for right block */
1172         xfs_alloc_ptr_t         *rpp=NULL;      /* address pointer for right block */
1173         xfs_alloc_rec_t         *rrp=NULL;      /* record pointer for right block */
1174
1175         /*
1176          * Set up variables for this block as "right".
1177          */
1178         rbp = cur->bc_bufs[level];
1179         right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1180 #ifdef DEBUG
1181         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1182                 return error;
1183 #endif
1184         /*
1185          * If we've got no left sibling then we can't shift an entry left.
1186          */
1187         if (be32_to_cpu(right->bb_leftsib) == NULLAGBLOCK) {
1188                 *stat = 0;
1189                 return 0;
1190         }
1191         /*
1192          * If the cursor entry is the one that would be moved, don't
1193          * do it... it's too complicated.
1194          */
1195         if (cur->bc_ptrs[level] <= 1) {
1196                 *stat = 0;
1197                 return 0;
1198         }
1199         /*
1200          * Set up the left neighbor as "left".
1201          */
1202         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1203                         cur->bc_private.a.agno, be32_to_cpu(right->bb_leftsib),
1204                         0, &lbp, XFS_ALLOC_BTREE_REF)))
1205                 return error;
1206         left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1207         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1208                 return error;
1209         /*
1210          * If it's full, it can't take another entry.
1211          */
1212         if (be16_to_cpu(left->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1213                 *stat = 0;
1214                 return 0;
1215         }
1216         nrec = be16_to_cpu(left->bb_numrecs) + 1;
1217         /*
1218          * If non-leaf, copy a key and a ptr to the left block.
1219          */
1220         if (level > 0) {
1221                 xfs_alloc_key_t *lkp;   /* key pointer for left block */
1222                 xfs_alloc_ptr_t *lpp;   /* address pointer for left block */
1223
1224                 lkp = XFS_ALLOC_KEY_ADDR(left, nrec, cur);
1225                 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1226                 *lkp = *rkp;
1227                 xfs_alloc_log_keys(cur, lbp, nrec, nrec);
1228                 lpp = XFS_ALLOC_PTR_ADDR(left, nrec, cur);
1229                 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1230 #ifdef DEBUG
1231                 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*rpp), level)))
1232                         return error;
1233 #endif
1234                 *lpp = *rpp; /* INT_: copy */
1235                 xfs_alloc_log_ptrs(cur, lbp, nrec, nrec);
1236                 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1237         }
1238         /*
1239          * If leaf, copy a record to the left block.
1240          */
1241         else {
1242                 xfs_alloc_rec_t *lrp;   /* record pointer for left block */
1243
1244                 lrp = XFS_ALLOC_REC_ADDR(left, nrec, cur);
1245                 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1246                 *lrp = *rrp;
1247                 xfs_alloc_log_recs(cur, lbp, nrec, nrec);
1248                 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1249         }
1250         /*
1251          * Bump and log left's numrecs, decrement and log right's numrecs.
1252          */
1253         be16_add(&left->bb_numrecs, 1);
1254         xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1255         be16_add(&right->bb_numrecs, -1);
1256         xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1257         /*
1258          * Slide the contents of right down one entry.
1259          */
1260         if (level > 0) {
1261 #ifdef DEBUG
1262                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1263                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i + 1]),
1264                                         level)))
1265                                 return error;
1266                 }
1267 #endif
1268                 memmove(rkp, rkp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1269                 memmove(rpp, rpp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1270                 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1271                 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1272         } else {
1273                 memmove(rrp, rrp + 1, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1274                 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1275                 key.ar_startblock = rrp->ar_startblock;
1276                 key.ar_blockcount = rrp->ar_blockcount;
1277                 rkp = &key;
1278         }
1279         /*
1280          * Update the parent key values of right.
1281          */
1282         if ((error = xfs_alloc_updkey(cur, rkp, level + 1)))
1283                 return error;
1284         /*
1285          * Slide the cursor value left one.
1286          */
1287         cur->bc_ptrs[level]--;
1288         *stat = 1;
1289         return 0;
1290 }
1291
1292 /*
1293  * Allocate a new root block, fill it in.
1294  */
1295 STATIC int                              /* error */
1296 xfs_alloc_newroot(
1297         xfs_btree_cur_t         *cur,   /* btree cursor */
1298         int                     *stat)  /* success/failure */
1299 {
1300         int                     error;  /* error return value */
1301         xfs_agblock_t           lbno;   /* left block number */
1302         xfs_buf_t               *lbp;   /* left btree buffer */
1303         xfs_alloc_block_t       *left;  /* left btree block */
1304         xfs_mount_t             *mp;    /* mount structure */
1305         xfs_agblock_t           nbno;   /* new block number */
1306         xfs_buf_t               *nbp;   /* new (root) buffer */
1307         xfs_alloc_block_t       *new;   /* new (root) btree block */
1308         int                     nptr;   /* new value for key index, 1 or 2 */
1309         xfs_agblock_t           rbno;   /* right block number */
1310         xfs_buf_t               *rbp;   /* right btree buffer */
1311         xfs_alloc_block_t       *right; /* right btree block */
1312
1313         mp = cur->bc_mp;
1314
1315         ASSERT(cur->bc_nlevels < XFS_AG_MAXLEVELS(mp));
1316         /*
1317          * Get a buffer from the freelist blocks, for the new root.
1318          */
1319         if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
1320                         &nbno)))
1321                 return error;
1322         /*
1323          * None available, we fail.
1324          */
1325         if (nbno == NULLAGBLOCK) {
1326                 *stat = 0;
1327                 return 0;
1328         }
1329         xfs_trans_agbtree_delta(cur->bc_tp, 1);
1330         nbp = xfs_btree_get_bufs(mp, cur->bc_tp, cur->bc_private.a.agno, nbno,
1331                 0);
1332         new = XFS_BUF_TO_ALLOC_BLOCK(nbp);
1333         /*
1334          * Set the root data in the a.g. freespace structure.
1335          */
1336         {
1337                 xfs_agf_t       *agf;   /* a.g. freespace header */
1338                 xfs_agnumber_t  seqno;
1339
1340                 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
1341                 agf->agf_roots[cur->bc_btnum] = cpu_to_be32(nbno);
1342                 be32_add(&agf->agf_levels[cur->bc_btnum], 1);
1343                 seqno = be32_to_cpu(agf->agf_seqno);
1344                 mp->m_perag[seqno].pagf_levels[cur->bc_btnum]++;
1345                 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
1346                         XFS_AGF_ROOTS | XFS_AGF_LEVELS);
1347         }
1348         /*
1349          * At the previous root level there are now two blocks: the old
1350          * root, and the new block generated when it was split.
1351          * We don't know which one the cursor is pointing at, so we
1352          * set up variables "left" and "right" for each case.
1353          */
1354         lbp = cur->bc_bufs[cur->bc_nlevels - 1];
1355         left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1356 #ifdef DEBUG
1357         if ((error = xfs_btree_check_sblock(cur, left, cur->bc_nlevels - 1, lbp)))
1358                 return error;
1359 #endif
1360         if (be32_to_cpu(left->bb_rightsib) != NULLAGBLOCK) {
1361                 /*
1362                  * Our block is left, pick up the right block.
1363                  */
1364                 lbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(lbp));
1365                 rbno = be32_to_cpu(left->bb_rightsib);
1366                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1367                                 cur->bc_private.a.agno, rbno, 0, &rbp,
1368                                 XFS_ALLOC_BTREE_REF)))
1369                         return error;
1370                 right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1371                 if ((error = xfs_btree_check_sblock(cur, right,
1372                                 cur->bc_nlevels - 1, rbp)))
1373                         return error;
1374                 nptr = 1;
1375         } else {
1376                 /*
1377                  * Our block is right, pick up the left block.
1378                  */
1379                 rbp = lbp;
1380                 right = left;
1381                 rbno = XFS_DADDR_TO_AGBNO(mp, XFS_BUF_ADDR(rbp));
1382                 lbno = be32_to_cpu(right->bb_leftsib);
1383                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
1384                                 cur->bc_private.a.agno, lbno, 0, &lbp,
1385                                 XFS_ALLOC_BTREE_REF)))
1386                         return error;
1387                 left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1388                 if ((error = xfs_btree_check_sblock(cur, left,
1389                                 cur->bc_nlevels - 1, lbp)))
1390                         return error;
1391                 nptr = 2;
1392         }
1393         /*
1394          * Fill in the new block's btree header and log it.
1395          */
1396         new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1397         new->bb_level = cpu_to_be16(cur->bc_nlevels);
1398         new->bb_numrecs = cpu_to_be16(2);
1399         new->bb_leftsib = cpu_to_be32(NULLAGBLOCK);
1400         new->bb_rightsib = cpu_to_be32(NULLAGBLOCK);
1401         xfs_alloc_log_block(cur->bc_tp, nbp, XFS_BB_ALL_BITS);
1402         ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1403         /*
1404          * Fill in the key data in the new root.
1405          */
1406         {
1407                 xfs_alloc_key_t         *kp;    /* btree key pointer */
1408
1409                 kp = XFS_ALLOC_KEY_ADDR(new, 1, cur);
1410                 if (be16_to_cpu(left->bb_level) > 0) {
1411                         kp[0] = *XFS_ALLOC_KEY_ADDR(left, 1, cur); /* INT_: structure copy */
1412                         kp[1] = *XFS_ALLOC_KEY_ADDR(right, 1, cur);/* INT_: structure copy */
1413                 } else {
1414                         xfs_alloc_rec_t *rp;    /* btree record pointer */
1415
1416                         rp = XFS_ALLOC_REC_ADDR(left, 1, cur);
1417                         kp[0].ar_startblock = rp->ar_startblock;
1418                         kp[0].ar_blockcount = rp->ar_blockcount;
1419                         rp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1420                         kp[1].ar_startblock = rp->ar_startblock;
1421                         kp[1].ar_blockcount = rp->ar_blockcount;
1422                 }
1423         }
1424         xfs_alloc_log_keys(cur, nbp, 1, 2);
1425         /*
1426          * Fill in the pointer data in the new root.
1427          */
1428         {
1429                 xfs_alloc_ptr_t         *pp;    /* btree address pointer */
1430
1431                 pp = XFS_ALLOC_PTR_ADDR(new, 1, cur);
1432                 pp[0] = cpu_to_be32(lbno);
1433                 pp[1] = cpu_to_be32(rbno);
1434         }
1435         xfs_alloc_log_ptrs(cur, nbp, 1, 2);
1436         /*
1437          * Fix up the cursor.
1438          */
1439         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1440         cur->bc_ptrs[cur->bc_nlevels] = nptr;
1441         cur->bc_nlevels++;
1442         *stat = 1;
1443         return 0;
1444 }
1445
1446 /*
1447  * Move 1 record right from cur/level if possible.
1448  * Update cur to reflect the new path.
1449  */
1450 STATIC int                              /* error */
1451 xfs_alloc_rshift(
1452         xfs_btree_cur_t         *cur,   /* btree cursor */
1453         int                     level,  /* level to shift record on */
1454         int                     *stat)  /* success/failure */
1455 {
1456         int                     error;  /* error return value */
1457         int                     i;      /* loop index */
1458         xfs_alloc_key_t         key;    /* key value for leaf level upward */
1459         xfs_buf_t               *lbp;   /* buffer for left (current) block */
1460         xfs_alloc_block_t       *left;  /* left (current) btree block */
1461         xfs_buf_t               *rbp;   /* buffer for right neighbor block */
1462         xfs_alloc_block_t       *right; /* right neighbor btree block */
1463         xfs_alloc_key_t         *rkp;   /* key pointer for right block */
1464         xfs_btree_cur_t         *tcur;  /* temporary cursor */
1465
1466         /*
1467          * Set up variables for this block as "left".
1468          */
1469         lbp = cur->bc_bufs[level];
1470         left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1471 #ifdef DEBUG
1472         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1473                 return error;
1474 #endif
1475         /*
1476          * If we've got no right sibling then we can't shift an entry right.
1477          */
1478         if (be32_to_cpu(left->bb_rightsib) == NULLAGBLOCK) {
1479                 *stat = 0;
1480                 return 0;
1481         }
1482         /*
1483          * If the cursor entry is the one that would be moved, don't
1484          * do it... it's too complicated.
1485          */
1486         if (cur->bc_ptrs[level] >= be16_to_cpu(left->bb_numrecs)) {
1487                 *stat = 0;
1488                 return 0;
1489         }
1490         /*
1491          * Set up the right neighbor as "right".
1492          */
1493         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1494                         cur->bc_private.a.agno, be32_to_cpu(left->bb_rightsib),
1495                         0, &rbp, XFS_ALLOC_BTREE_REF)))
1496                 return error;
1497         right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1498         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1499                 return error;
1500         /*
1501          * If it's full, it can't take another entry.
1502          */
1503         if (be16_to_cpu(right->bb_numrecs) == XFS_ALLOC_BLOCK_MAXRECS(level, cur)) {
1504                 *stat = 0;
1505                 return 0;
1506         }
1507         /*
1508          * Make a hole at the start of the right neighbor block, then
1509          * copy the last left block entry to the hole.
1510          */
1511         if (level > 0) {
1512                 xfs_alloc_key_t *lkp;   /* key pointer for left block */
1513                 xfs_alloc_ptr_t *lpp;   /* address pointer for left block */
1514                 xfs_alloc_ptr_t *rpp;   /* address pointer for right block */
1515
1516                 lkp = XFS_ALLOC_KEY_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1517                 lpp = XFS_ALLOC_PTR_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1518                 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1519                 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1520 #ifdef DEBUG
1521                 for (i = be16_to_cpu(right->bb_numrecs) - 1; i >= 0; i--) {
1522                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(rpp[i]), level)))
1523                                 return error;
1524                 }
1525 #endif
1526                 memmove(rkp + 1, rkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1527                 memmove(rpp + 1, rpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1528 #ifdef DEBUG
1529                 if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(*lpp), level)))
1530                         return error;
1531 #endif
1532                 *rkp = *lkp; /* INT_: copy */
1533                 *rpp = *lpp; /* INT_: copy */
1534                 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1535                 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1536                 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1537         } else {
1538                 xfs_alloc_rec_t *lrp;   /* record pointer for left block */
1539                 xfs_alloc_rec_t *rrp;   /* record pointer for right block */
1540
1541                 lrp = XFS_ALLOC_REC_ADDR(left, be16_to_cpu(left->bb_numrecs), cur);
1542                 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1543                 memmove(rrp + 1, rrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1544                 *rrp = *lrp;
1545                 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs) + 1);
1546                 key.ar_startblock = rrp->ar_startblock;
1547                 key.ar_blockcount = rrp->ar_blockcount;
1548                 rkp = &key;
1549                 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1550         }
1551         /*
1552          * Decrement and log left's numrecs, bump and log right's numrecs.
1553          */
1554         be16_add(&left->bb_numrecs, -1);
1555         xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1556         be16_add(&right->bb_numrecs, 1);
1557         xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1558         /*
1559          * Using a temporary cursor, update the parent key values of the
1560          * block on the right.
1561          */
1562         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1563                 return error;
1564         i = xfs_btree_lastrec(tcur, level);
1565         XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1566         if ((error = xfs_alloc_increment(tcur, level, &i)) ||
1567             (error = xfs_alloc_updkey(tcur, rkp, level + 1)))
1568                 goto error0;
1569         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1570         *stat = 1;
1571         return 0;
1572 error0:
1573         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1574         return error;
1575 }
1576
1577 /*
1578  * Split cur/level block in half.
1579  * Return new block number and its first record (to be inserted into parent).
1580  */
1581 STATIC int                              /* error */
1582 xfs_alloc_split(
1583         xfs_btree_cur_t         *cur,   /* btree cursor */
1584         int                     level,  /* level to split */
1585         xfs_agblock_t           *bnop,  /* output: block number allocated */
1586         xfs_alloc_key_t         *keyp,  /* output: first key of new block */
1587         xfs_btree_cur_t         **curp, /* output: new cursor */
1588         int                     *stat)  /* success/failure */
1589 {
1590         int                     error;  /* error return value */
1591         int                     i;      /* loop index/record number */
1592         xfs_agblock_t           lbno;   /* left (current) block number */
1593         xfs_buf_t               *lbp;   /* buffer for left block */
1594         xfs_alloc_block_t       *left;  /* left (current) btree block */
1595         xfs_agblock_t           rbno;   /* right (new) block number */
1596         xfs_buf_t               *rbp;   /* buffer for right block */
1597         xfs_alloc_block_t       *right; /* right (new) btree block */
1598
1599         /*
1600          * Allocate the new block from the freelist.
1601          * If we can't do it, we're toast.  Give up.
1602          */
1603         if ((error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_private.a.agbp,
1604                         &rbno)))
1605                 return error;
1606         if (rbno == NULLAGBLOCK) {
1607                 *stat = 0;
1608                 return 0;
1609         }
1610         xfs_trans_agbtree_delta(cur->bc_tp, 1);
1611         rbp = xfs_btree_get_bufs(cur->bc_mp, cur->bc_tp, cur->bc_private.a.agno,
1612                 rbno, 0);
1613         /*
1614          * Set up the new block as "right".
1615          */
1616         right = XFS_BUF_TO_ALLOC_BLOCK(rbp);
1617         /*
1618          * "Left" is the current (according to the cursor) block.
1619          */
1620         lbp = cur->bc_bufs[level];
1621         left = XFS_BUF_TO_ALLOC_BLOCK(lbp);
1622 #ifdef DEBUG
1623         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1624                 return error;
1625 #endif
1626         /*
1627          * Fill in the btree header for the new block.
1628          */
1629         right->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]);
1630         right->bb_level = left->bb_level;
1631         right->bb_numrecs = cpu_to_be16(be16_to_cpu(left->bb_numrecs) / 2);
1632         /*
1633          * Make sure that if there's an odd number of entries now, that
1634          * each new block will have the same number of entries.
1635          */
1636         if ((be16_to_cpu(left->bb_numrecs) & 1) &&
1637             cur->bc_ptrs[level] <= be16_to_cpu(right->bb_numrecs) + 1)
1638                 be16_add(&right->bb_numrecs, 1);
1639         i = be16_to_cpu(left->bb_numrecs) - be16_to_cpu(right->bb_numrecs) + 1;
1640         /*
1641          * For non-leaf blocks, copy keys and addresses over to the new block.
1642          */
1643         if (level > 0) {
1644                 xfs_alloc_key_t *lkp;   /* left btree key pointer */
1645                 xfs_alloc_ptr_t *lpp;   /* left btree address pointer */
1646                 xfs_alloc_key_t *rkp;   /* right btree key pointer */
1647                 xfs_alloc_ptr_t *rpp;   /* right btree address pointer */
1648
1649                 lkp = XFS_ALLOC_KEY_ADDR(left, i, cur);
1650                 lpp = XFS_ALLOC_PTR_ADDR(left, i, cur);
1651                 rkp = XFS_ALLOC_KEY_ADDR(right, 1, cur);
1652                 rpp = XFS_ALLOC_PTR_ADDR(right, 1, cur);
1653 #ifdef DEBUG
1654                 for (i = 0; i < be16_to_cpu(right->bb_numrecs); i++) {
1655                         if ((error = xfs_btree_check_sptr(cur, be32_to_cpu(lpp[i]), level)))
1656                                 return error;
1657                 }
1658 #endif
1659                 memcpy(rkp, lkp, be16_to_cpu(right->bb_numrecs) * sizeof(*rkp));
1660                 memcpy(rpp, lpp, be16_to_cpu(right->bb_numrecs) * sizeof(*rpp));
1661                 xfs_alloc_log_keys(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1662                 xfs_alloc_log_ptrs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1663                 *keyp = *rkp;
1664         }
1665         /*
1666          * For leaf blocks, copy records over to the new block.
1667          */
1668         else {
1669                 xfs_alloc_rec_t *lrp;   /* left btree record pointer */
1670                 xfs_alloc_rec_t *rrp;   /* right btree record pointer */
1671
1672                 lrp = XFS_ALLOC_REC_ADDR(left, i, cur);
1673                 rrp = XFS_ALLOC_REC_ADDR(right, 1, cur);
1674                 memcpy(rrp, lrp, be16_to_cpu(right->bb_numrecs) * sizeof(*rrp));
1675                 xfs_alloc_log_recs(cur, rbp, 1, be16_to_cpu(right->bb_numrecs));
1676                 keyp->ar_startblock = rrp->ar_startblock;
1677                 keyp->ar_blockcount = rrp->ar_blockcount;
1678         }
1679         /*
1680          * Find the left block number by looking in the buffer.
1681          * Adjust numrecs, sibling pointers.
1682          */
1683         lbno = XFS_DADDR_TO_AGBNO(cur->bc_mp, XFS_BUF_ADDR(lbp));
1684         be16_add(&left->bb_numrecs, -(be16_to_cpu(right->bb_numrecs)));
1685         right->bb_rightsib = left->bb_rightsib;
1686         left->bb_rightsib = cpu_to_be32(rbno);
1687         right->bb_leftsib = cpu_to_be32(lbno);
1688         xfs_alloc_log_block(cur->bc_tp, rbp, XFS_BB_ALL_BITS);
1689         xfs_alloc_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1690         /*
1691          * If there's a block to the new block's right, make that block
1692          * point back to right instead of to left.
1693          */
1694         if (be32_to_cpu(right->bb_rightsib) != NULLAGBLOCK) {
1695                 xfs_alloc_block_t       *rrblock;       /* rr btree block */
1696                 xfs_buf_t               *rrbp;          /* buffer for rrblock */
1697
1698                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1699                                 cur->bc_private.a.agno, be32_to_cpu(right->bb_rightsib), 0,
1700                                 &rrbp, XFS_ALLOC_BTREE_REF)))
1701                         return error;
1702                 rrblock = XFS_BUF_TO_ALLOC_BLOCK(rrbp);
1703                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1704                         return error;
1705                 rrblock->bb_leftsib = cpu_to_be32(rbno);
1706                 xfs_alloc_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
1707         }
1708         /*
1709          * If the cursor is really in the right block, move it there.
1710          * If it's just pointing past the last entry in left, then we'll
1711          * insert there, so don't change anything in that case.
1712          */
1713         if (cur->bc_ptrs[level] > be16_to_cpu(left->bb_numrecs) + 1) {
1714                 xfs_btree_setbuf(cur, level, rbp);
1715                 cur->bc_ptrs[level] -= be16_to_cpu(left->bb_numrecs);
1716         }
1717         /*
1718          * If there are more levels, we'll need another cursor which refers to
1719          * the right block, no matter where this cursor was.
1720          */
1721         if (level + 1 < cur->bc_nlevels) {
1722                 if ((error = xfs_btree_dup_cursor(cur, curp)))
1723                         return error;
1724                 (*curp)->bc_ptrs[level + 1]++;
1725         }
1726         *bnop = rbno;
1727         *stat = 1;
1728         return 0;
1729 }
1730
1731 /*
1732  * Update keys at all levels from here to the root along the cursor's path.
1733  */
1734 STATIC int                              /* error */
1735 xfs_alloc_updkey(
1736         xfs_btree_cur_t         *cur,   /* btree cursor */
1737         xfs_alloc_key_t         *keyp,  /* new key value to update to */
1738         int                     level)  /* starting level for update */
1739 {
1740         int                     ptr;    /* index of key in block */
1741
1742         /*
1743          * Go up the tree from this level toward the root.
1744          * At each level, update the key value to the value input.
1745          * Stop when we reach a level where the cursor isn't pointing
1746          * at the first entry in the block.
1747          */
1748         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1749                 xfs_alloc_block_t       *block; /* btree block */
1750                 xfs_buf_t               *bp;    /* buffer for block */
1751 #ifdef DEBUG
1752                 int                     error;  /* error return value */
1753 #endif
1754                 xfs_alloc_key_t         *kp;    /* ptr to btree block keys */
1755
1756                 bp = cur->bc_bufs[level];
1757                 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1758 #ifdef DEBUG
1759                 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1760                         return error;
1761 #endif
1762                 ptr = cur->bc_ptrs[level];
1763                 kp = XFS_ALLOC_KEY_ADDR(block, ptr, cur);
1764                 *kp = *keyp;
1765                 xfs_alloc_log_keys(cur, bp, ptr, ptr);
1766         }
1767         return 0;
1768 }
1769
1770 /*
1771  * Externally visible routines.
1772  */
1773
1774 /*
1775  * Decrement cursor by one record at the level.
1776  * For nonzero levels the leaf-ward information is untouched.
1777  */
1778 int                                     /* error */
1779 xfs_alloc_decrement(
1780         xfs_btree_cur_t         *cur,   /* btree cursor */
1781         int                     level,  /* level in btree, 0 is leaf */
1782         int                     *stat)  /* success/failure */
1783 {
1784         xfs_alloc_block_t       *block; /* btree block */
1785         int                     error;  /* error return value */
1786         int                     lev;    /* btree level */
1787
1788         ASSERT(level < cur->bc_nlevels);
1789         /*
1790          * Read-ahead to the left at this level.
1791          */
1792         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1793         /*
1794          * Decrement the ptr at this level.  If we're still in the block
1795          * then we're done.
1796          */
1797         if (--cur->bc_ptrs[level] > 0) {
1798                 *stat = 1;
1799                 return 0;
1800         }
1801         /*
1802          * Get a pointer to the btree block.
1803          */
1804         block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[level]);
1805 #ifdef DEBUG
1806         if ((error = xfs_btree_check_sblock(cur, block, level,
1807                         cur->bc_bufs[level])))
1808                 return error;
1809 #endif
1810         /*
1811          * If we just went off the left edge of the tree, return failure.
1812          */
1813         if (be32_to_cpu(block->bb_leftsib) == NULLAGBLOCK) {
1814                 *stat = 0;
1815                 return 0;
1816         }
1817         /*
1818          * March up the tree decrementing pointers.
1819          * Stop when we don't go off the left edge of a block.
1820          */
1821         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1822                 if (--cur->bc_ptrs[lev] > 0)
1823                         break;
1824                 /*
1825                  * Read-ahead the left block, we're going to read it
1826                  * in the next loop.
1827                  */
1828                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1829         }
1830         /*
1831          * If we went off the root then we are seriously confused.
1832          */
1833         ASSERT(lev < cur->bc_nlevels);
1834         /*
1835          * Now walk back down the tree, fixing up the cursor's buffer
1836          * pointers and key numbers.
1837          */
1838         for (block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1839                 xfs_agblock_t   agbno;  /* block number of btree block */
1840                 xfs_buf_t       *bp;    /* buffer pointer for block */
1841
1842                 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
1843                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1844                                 cur->bc_private.a.agno, agbno, 0, &bp,
1845                                 XFS_ALLOC_BTREE_REF)))
1846                         return error;
1847                 lev--;
1848                 xfs_btree_setbuf(cur, lev, bp);
1849                 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1850                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1851                         return error;
1852                 cur->bc_ptrs[lev] = be16_to_cpu(block->bb_numrecs);
1853         }
1854         *stat = 1;
1855         return 0;
1856 }
1857
1858 /*
1859  * Delete the record pointed to by cur.
1860  * The cursor refers to the place where the record was (could be inserted)
1861  * when the operation returns.
1862  */
1863 int                                     /* error */
1864 xfs_alloc_delete(
1865         xfs_btree_cur_t *cur,           /* btree cursor */
1866         int             *stat)          /* success/failure */
1867 {
1868         int             error;          /* error return value */
1869         int             i;              /* result code */
1870         int             level;          /* btree level */
1871
1872         /*
1873          * Go up the tree, starting at leaf level.
1874          * If 2 is returned then a join was done; go to the next level.
1875          * Otherwise we are done.
1876          */
1877         for (level = 0, i = 2; i == 2; level++) {
1878                 if ((error = xfs_alloc_delrec(cur, level, &i)))
1879                         return error;
1880         }
1881         if (i == 0) {
1882                 for (level = 1; level < cur->bc_nlevels; level++) {
1883                         if (cur->bc_ptrs[level] == 0) {
1884                                 if ((error = xfs_alloc_decrement(cur, level, &i)))
1885                                         return error;
1886                                 break;
1887                         }
1888                 }
1889         }
1890         *stat = i;
1891         return 0;
1892 }
1893
1894 /*
1895  * Get the data from the pointed-to record.
1896  */
1897 int                                     /* error */
1898 xfs_alloc_get_rec(
1899         xfs_btree_cur_t         *cur,   /* btree cursor */
1900         xfs_agblock_t           *bno,   /* output: starting block of extent */
1901         xfs_extlen_t            *len,   /* output: length of extent */
1902         int                     *stat)  /* output: success/failure */
1903 {
1904         xfs_alloc_block_t       *block; /* btree block */
1905 #ifdef DEBUG
1906         int                     error;  /* error return value */
1907 #endif
1908         int                     ptr;    /* record number */
1909
1910         ptr = cur->bc_ptrs[0];
1911         block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
1912 #ifdef DEBUG
1913         if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
1914                 return error;
1915 #endif
1916         /*
1917          * Off the right end or left end, return failure.
1918          */
1919         if (ptr > be16_to_cpu(block->bb_numrecs) || ptr <= 0) {
1920                 *stat = 0;
1921                 return 0;
1922         }
1923         /*
1924          * Point to the record and extract its data.
1925          */
1926         {
1927                 xfs_alloc_rec_t         *rec;   /* record data */
1928
1929                 rec = XFS_ALLOC_REC_ADDR(block, ptr, cur);
1930                 *bno = be32_to_cpu(rec->ar_startblock);
1931                 *len = be32_to_cpu(rec->ar_blockcount);
1932         }
1933         *stat = 1;
1934         return 0;
1935 }
1936
1937 /*
1938  * Increment cursor by one record at the level.
1939  * For nonzero levels the leaf-ward information is untouched.
1940  */
1941 int                                     /* error */
1942 xfs_alloc_increment(
1943         xfs_btree_cur_t         *cur,   /* btree cursor */
1944         int                     level,  /* level in btree, 0 is leaf */
1945         int                     *stat)  /* success/failure */
1946 {
1947         xfs_alloc_block_t       *block; /* btree block */
1948         xfs_buf_t               *bp;    /* tree block buffer */
1949         int                     error;  /* error return value */
1950         int                     lev;    /* btree level */
1951
1952         ASSERT(level < cur->bc_nlevels);
1953         /*
1954          * Read-ahead to the right at this level.
1955          */
1956         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1957         /*
1958          * Get a pointer to the btree block.
1959          */
1960         bp = cur->bc_bufs[level];
1961         block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1962 #ifdef DEBUG
1963         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1964                 return error;
1965 #endif
1966         /*
1967          * Increment the ptr at this level.  If we're still in the block
1968          * then we're done.
1969          */
1970         if (++cur->bc_ptrs[level] <= be16_to_cpu(block->bb_numrecs)) {
1971                 *stat = 1;
1972                 return 0;
1973         }
1974         /*
1975          * If we just went off the right edge of the tree, return failure.
1976          */
1977         if (be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK) {
1978                 *stat = 0;
1979                 return 0;
1980         }
1981         /*
1982          * March up the tree incrementing pointers.
1983          * Stop when we don't go off the right edge of a block.
1984          */
1985         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1986                 bp = cur->bc_bufs[lev];
1987                 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
1988 #ifdef DEBUG
1989                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1990                         return error;
1991 #endif
1992                 if (++cur->bc_ptrs[lev] <= be16_to_cpu(block->bb_numrecs))
1993                         break;
1994                 /*
1995                  * Read-ahead the right block, we're going to read it
1996                  * in the next loop.
1997                  */
1998                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1999         }
2000         /*
2001          * If we went off the root then we are seriously confused.
2002          */
2003         ASSERT(lev < cur->bc_nlevels);
2004         /*
2005          * Now walk back down the tree, fixing up the cursor's buffer
2006          * pointers and key numbers.
2007          */
2008         for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2009              lev > level; ) {
2010                 xfs_agblock_t   agbno;  /* block number of btree block */
2011
2012                 agbno = be32_to_cpu(*XFS_ALLOC_PTR_ADDR(block, cur->bc_ptrs[lev], cur));
2013                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
2014                                 cur->bc_private.a.agno, agbno, 0, &bp,
2015                                 XFS_ALLOC_BTREE_REF)))
2016                         return error;
2017                 lev--;
2018                 xfs_btree_setbuf(cur, lev, bp);
2019                 block = XFS_BUF_TO_ALLOC_BLOCK(bp);
2020                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
2021                         return error;
2022                 cur->bc_ptrs[lev] = 1;
2023         }
2024         *stat = 1;
2025         return 0;
2026 }
2027
2028 /*
2029  * Insert the current record at the point referenced by cur.
2030  * The cursor may be inconsistent on return if splits have been done.
2031  */
2032 int                                     /* error */
2033 xfs_alloc_insert(
2034         xfs_btree_cur_t *cur,           /* btree cursor */
2035         int             *stat)          /* success/failure */
2036 {
2037         int             error;          /* error return value */
2038         int             i;              /* result value, 0 for failure */
2039         int             level;          /* current level number in btree */
2040         xfs_agblock_t   nbno;           /* new block number (split result) */
2041         xfs_btree_cur_t *ncur;          /* new cursor (split result) */
2042         xfs_alloc_rec_t nrec;           /* record being inserted this level */
2043         xfs_btree_cur_t *pcur;          /* previous level's cursor */
2044
2045         level = 0;
2046         nbno = NULLAGBLOCK;
2047         nrec.ar_startblock = cpu_to_be32(cur->bc_rec.a.ar_startblock);
2048         nrec.ar_blockcount = cpu_to_be32(cur->bc_rec.a.ar_blockcount);
2049         ncur = (xfs_btree_cur_t *)0;
2050         pcur = cur;
2051         /*
2052          * Loop going up the tree, starting at the leaf level.
2053          * Stop when we don't get a split block, that must mean that
2054          * the insert is finished with this level.
2055          */
2056         do {
2057                 /*
2058                  * Insert nrec/nbno into this level of the tree.
2059                  * Note if we fail, nbno will be null.
2060                  */
2061                 if ((error = xfs_alloc_insrec(pcur, level++, &nbno, &nrec, &ncur,
2062                                 &i))) {
2063                         if (pcur != cur)
2064                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
2065                         return error;
2066                 }
2067                 /*
2068                  * See if the cursor we just used is trash.
2069                  * Can't trash the caller's cursor, but otherwise we should
2070                  * if ncur is a new cursor or we're about to be done.
2071                  */
2072                 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
2073                         cur->bc_nlevels = pcur->bc_nlevels;
2074                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
2075                 }
2076                 /*
2077                  * If we got a new cursor, switch to it.
2078                  */
2079                 if (ncur) {
2080                         pcur = ncur;
2081                         ncur = (xfs_btree_cur_t *)0;
2082                 }
2083         } while (nbno != NULLAGBLOCK);
2084         *stat = i;
2085         return 0;
2086 }
2087
2088 /*
2089  * Lookup the record equal to [bno, len] in the btree given by cur.
2090  */
2091 int                                     /* error */
2092 xfs_alloc_lookup_eq(
2093         xfs_btree_cur_t *cur,           /* btree cursor */
2094         xfs_agblock_t   bno,            /* starting block of extent */
2095         xfs_extlen_t    len,            /* length of extent */
2096         int             *stat)          /* success/failure */
2097 {
2098         cur->bc_rec.a.ar_startblock = bno;
2099         cur->bc_rec.a.ar_blockcount = len;
2100         return xfs_alloc_lookup(cur, XFS_LOOKUP_EQ, stat);
2101 }
2102
2103 /*
2104  * Lookup the first record greater than or equal to [bno, len]
2105  * in the btree given by cur.
2106  */
2107 int                                     /* error */
2108 xfs_alloc_lookup_ge(
2109         xfs_btree_cur_t *cur,           /* btree cursor */
2110         xfs_agblock_t   bno,            /* starting block of extent */
2111         xfs_extlen_t    len,            /* length of extent */
2112         int             *stat)          /* success/failure */
2113 {
2114         cur->bc_rec.a.ar_startblock = bno;
2115         cur->bc_rec.a.ar_blockcount = len;
2116         return xfs_alloc_lookup(cur, XFS_LOOKUP_GE, stat);
2117 }
2118
2119 /*
2120  * Lookup the first record less than or equal to [bno, len]
2121  * in the btree given by cur.
2122  */
2123 int                                     /* error */
2124 xfs_alloc_lookup_le(
2125         xfs_btree_cur_t *cur,           /* btree cursor */
2126         xfs_agblock_t   bno,            /* starting block of extent */
2127         xfs_extlen_t    len,            /* length of extent */
2128         int             *stat)          /* success/failure */
2129 {
2130         cur->bc_rec.a.ar_startblock = bno;
2131         cur->bc_rec.a.ar_blockcount = len;
2132         return xfs_alloc_lookup(cur, XFS_LOOKUP_LE, stat);
2133 }
2134
2135 /*
2136  * Update the record referred to by cur, to the value given by [bno, len].
2137  * This either works (return 0) or gets an EFSCORRUPTED error.
2138  */
2139 int                                     /* error */
2140 xfs_alloc_update(
2141         xfs_btree_cur_t         *cur,   /* btree cursor */
2142         xfs_agblock_t           bno,    /* starting block of extent */
2143         xfs_extlen_t            len)    /* length of extent */
2144 {
2145         xfs_alloc_block_t       *block; /* btree block to update */
2146         int                     error;  /* error return value */
2147         int                     ptr;    /* current record number (updating) */
2148
2149         ASSERT(len > 0);
2150         /*
2151          * Pick up the a.g. freelist struct and the current block.
2152          */
2153         block = XFS_BUF_TO_ALLOC_BLOCK(cur->bc_bufs[0]);
2154 #ifdef DEBUG
2155         if ((error = xfs_btree_check_sblock(cur, block, 0, cur->bc_bufs[0])))
2156                 return error;
2157 #endif
2158         /*
2159          * Get the address of the rec to be updated.
2160          */
2161         ptr = cur->bc_ptrs[0];
2162         {
2163                 xfs_alloc_rec_t         *rp;    /* pointer to updated record */
2164
2165                 rp = XFS_ALLOC_REC_ADDR(block, ptr, cur);
2166                 /*
2167                  * Fill in the new contents and log them.
2168                  */
2169                 rp->ar_startblock = cpu_to_be32(bno);
2170                 rp->ar_blockcount = cpu_to_be32(len);
2171                 xfs_alloc_log_recs(cur, cur->bc_bufs[0], ptr, ptr);
2172         }
2173         /*
2174          * If it's the by-size btree and it's the last leaf block and
2175          * it's the last record... then update the size of the longest
2176          * extent in the a.g., which we cache in the a.g. freelist header.
2177          */
2178         if (cur->bc_btnum == XFS_BTNUM_CNT &&
2179             be32_to_cpu(block->bb_rightsib) == NULLAGBLOCK &&
2180             ptr == be16_to_cpu(block->bb_numrecs)) {
2181                 xfs_agf_t       *agf;   /* a.g. freespace header */
2182                 xfs_agnumber_t  seqno;
2183
2184                 agf = XFS_BUF_TO_AGF(cur->bc_private.a.agbp);
2185                 seqno = be32_to_cpu(agf->agf_seqno);
2186                 cur->bc_mp->m_perag[seqno].pagf_longest = len;
2187                 agf->agf_longest = cpu_to_be32(len);
2188                 xfs_alloc_log_agf(cur->bc_tp, cur->bc_private.a.agbp,
2189                         XFS_AGF_LONGEST);
2190         }
2191         /*
2192          * Updating first record in leaf. Pass new key value up to our parent.
2193          */
2194         if (ptr == 1) {
2195                 xfs_alloc_key_t key;    /* key containing [bno, len] */
2196
2197                 key.ar_startblock = cpu_to_be32(bno);
2198                 key.ar_blockcount = cpu_to_be32(len);
2199                 if ((error = xfs_alloc_updkey(cur, &key, 1)))
2200                         return error;
2201         }
2202         return 0;
2203 }