2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
 
   5  * This program is free software; you can redistribute it and/or
 
   6  * modify it under the terms of the GNU General Public License as
 
   7  * published by the Free Software Foundation.
 
   9  * This program is distributed in the hope that it would be useful,
 
  10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
  12  * GNU General Public License for more details.
 
  14  * You should have received a copy of the GNU General Public License
 
  15  * along with this program; if not, write the Free Software Foundation,
 
  16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
  20 #include "xfs_types.h"
 
  24 #include "xfs_trans.h"
 
  29 #include "xfs_dmapi.h"
 
  30 #include "xfs_mount.h"
 
  31 #include "xfs_da_btree.h"
 
  32 #include "xfs_bmap_btree.h"
 
  33 #include "xfs_alloc_btree.h"
 
  34 #include "xfs_ialloc_btree.h"
 
  35 #include "xfs_dir_sf.h"
 
  36 #include "xfs_dir2_sf.h"
 
  37 #include "xfs_attr_sf.h"
 
  38 #include "xfs_dinode.h"
 
  39 #include "xfs_inode.h"
 
  40 #include "xfs_inode_item.h"
 
  41 #include "xfs_alloc.h"
 
  42 #include "xfs_btree.h"
 
  45 #include "xfs_attr_leaf.h"
 
  46 #include "xfs_dir_leaf.h"
 
  47 #include "xfs_dir2_data.h"
 
  48 #include "xfs_dir2_leaf.h"
 
  49 #include "xfs_dir2_block.h"
 
  50 #include "xfs_dir2_node.h"
 
  51 #include "xfs_error.h"
 
  56  * Routines to implement directories as Btrees of hashed names.
 
  59 /*========================================================================
 
  60  * Function prototypes for the kernel.
 
  61  *========================================================================*/
 
  64  * Routines used for growing the Btree.
 
  66 STATIC int xfs_da_root_split(xfs_da_state_t *state,
 
  67                                             xfs_da_state_blk_t *existing_root,
 
  68                                             xfs_da_state_blk_t *new_child);
 
  69 STATIC int xfs_da_node_split(xfs_da_state_t *state,
 
  70                                             xfs_da_state_blk_t *existing_blk,
 
  71                                             xfs_da_state_blk_t *split_blk,
 
  72                                             xfs_da_state_blk_t *blk_to_add,
 
  75 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
 
  76                                          xfs_da_state_blk_t *node_blk_1,
 
  77                                          xfs_da_state_blk_t *node_blk_2);
 
  78 STATIC void xfs_da_node_add(xfs_da_state_t *state,
 
  79                                    xfs_da_state_blk_t *old_node_blk,
 
  80                                    xfs_da_state_blk_t *new_node_blk);
 
  83  * Routines used for shrinking the Btree.
 
  85 STATIC int xfs_da_root_join(xfs_da_state_t *state,
 
  86                                            xfs_da_state_blk_t *root_blk);
 
  87 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
 
  88 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
 
  89                                               xfs_da_state_blk_t *drop_blk);
 
  90 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
 
  91                                          xfs_da_state_blk_t *src_node_blk,
 
  92                                          xfs_da_state_blk_t *dst_node_blk);
 
  97 STATIC uint     xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
 
  98 STATIC int      xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
 
  99 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
 
 100 STATIC int      xfs_da_blk_unlink(xfs_da_state_t *state,
 
 101                                   xfs_da_state_blk_t *drop_blk,
 
 102                                   xfs_da_state_blk_t *save_blk);
 
 103 STATIC void     xfs_da_state_kill_altpath(xfs_da_state_t *state);
 
 105 /*========================================================================
 
 106  * Routines used for growing the Btree.
 
 107  *========================================================================*/
 
 110  * Create the initial contents of an intermediate node.
 
 113 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
 
 114                                  xfs_dabuf_t **bpp, int whichfork)
 
 116         xfs_da_intnode_t *node;
 
 122         error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
 
 127         node->hdr.info.forw = 0;
 
 128         node->hdr.info.back = 0;
 
 129         INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
 
 130         node->hdr.info.pad = 0;
 
 132         INT_SET(node->hdr.level, ARCH_CONVERT, level);
 
 134         xfs_da_log_buf(tp, bp,
 
 135                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
 142  * Split a leaf node, rebalance, then possibly split
 
 143  * intermediate nodes, rebalance, etc.
 
 146 xfs_da_split(xfs_da_state_t *state)
 
 148         xfs_da_state_blk_t *oldblk, *newblk, *addblk;
 
 149         xfs_da_intnode_t *node;
 
 151         int max, action, error, i;
 
 154          * Walk back up the tree splitting/inserting/adjusting as necessary.
 
 155          * If we need to insert and there isn't room, split the node, then
 
 156          * decide which fragment to insert the new block from below into.
 
 157          * Note that we may split the root this way, but we need more fixup.
 
 159         max = state->path.active - 1;
 
 160         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
 
 161         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
 
 162                state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
 
 164         addblk = &state->path.blk[max];         /* initial dummy value */
 
 165         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
 
 166                 oldblk = &state->path.blk[i];
 
 167                 newblk = &state->altpath.blk[i];
 
 170                  * If a leaf node then
 
 171                  *     Allocate a new leaf node, then rebalance across them.
 
 172                  * else if an intermediate node then
 
 173                  *     We split on the last layer, must we split the node?
 
 175                 switch (oldblk->magic) {
 
 176                 case XFS_ATTR_LEAF_MAGIC:
 
 177                         error = xfs_attr_leaf_split(state, oldblk, newblk);
 
 178                         if ((error != 0) && (error != ENOSPC)) {
 
 179                                 return(error);  /* GROT: attr is inconsistent */
 
 186                          * Entry wouldn't fit, split the leaf again.
 
 188                         state->extravalid = 1;
 
 190                                 state->extraafter = 0;  /* before newblk */
 
 191                                 error = xfs_attr_leaf_split(state, oldblk,
 
 194                                 state->extraafter = 1;  /* after newblk */
 
 195                                 error = xfs_attr_leaf_split(state, newblk,
 
 199                                 return(error);  /* GROT: attr inconsistent */
 
 202                 case XFS_DIR_LEAF_MAGIC:
 
 203                         ASSERT(XFS_DIR_IS_V1(state->mp));
 
 204                         error = xfs_dir_leaf_split(state, oldblk, newblk);
 
 205                         if ((error != 0) && (error != ENOSPC)) {
 
 206                                 return(error);  /* GROT: dir is inconsistent */
 
 213                          * Entry wouldn't fit, split the leaf again.
 
 215                         state->extravalid = 1;
 
 217                                 state->extraafter = 0;  /* before newblk */
 
 218                                 error = xfs_dir_leaf_split(state, oldblk,
 
 221                                         return(error);  /* GROT: dir incon. */
 
 224                                 state->extraafter = 1;  /* after newblk */
 
 225                                 error = xfs_dir_leaf_split(state, newblk,
 
 228                                         return(error);  /* GROT: dir incon. */
 
 232                 case XFS_DIR2_LEAFN_MAGIC:
 
 233                         ASSERT(XFS_DIR_IS_V2(state->mp));
 
 234                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
 
 239                 case XFS_DA_NODE_MAGIC:
 
 240                         error = xfs_da_node_split(state, oldblk, newblk, addblk,
 
 242                         xfs_da_buf_done(addblk->bp);
 
 245                                 return(error);  /* GROT: dir is inconsistent */
 
 247                          * Record the newly split block for the next time thru?
 
 257                  * Update the btree to show the new hashval for this child.
 
 259                 xfs_da_fixhashpath(state, &state->path);
 
 261                  * If we won't need this block again, it's getting dropped
 
 262                  * from the active path by the loop control, so we need
 
 263                  * to mark it done now.
 
 265                 if (i > 0 || !addblk)
 
 266                         xfs_da_buf_done(oldblk->bp);
 
 272          * Split the root node.
 
 274         ASSERT(state->path.active == 0);
 
 275         oldblk = &state->path.blk[0];
 
 276         error = xfs_da_root_split(state, oldblk, addblk);
 
 278                 xfs_da_buf_done(oldblk->bp);
 
 279                 xfs_da_buf_done(addblk->bp);
 
 281                 return(error);  /* GROT: dir is inconsistent */
 
 285          * Update pointers to the node which used to be block 0 and
 
 286          * just got bumped because of the addition of a new root node.
 
 287          * There might be three blocks involved if a double split occurred,
 
 288          * and the original block 0 could be at any position in the list.
 
 291         node = oldblk->bp->data;
 
 292         if (node->hdr.info.forw) {
 
 293                 if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
 
 296                         ASSERT(state->extravalid);
 
 297                         bp = state->extrablk.bp;
 
 300                 INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
 
 301                 xfs_da_log_buf(state->args->trans, bp,
 
 302                     XFS_DA_LOGRANGE(node, &node->hdr.info,
 
 303                     sizeof(node->hdr.info)));
 
 305         node = oldblk->bp->data;
 
 306         if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
 
 307                 if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
 
 310                         ASSERT(state->extravalid);
 
 311                         bp = state->extrablk.bp;
 
 314                 INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
 
 315                 xfs_da_log_buf(state->args->trans, bp,
 
 316                     XFS_DA_LOGRANGE(node, &node->hdr.info,
 
 317                     sizeof(node->hdr.info)));
 
 319         xfs_da_buf_done(oldblk->bp);
 
 320         xfs_da_buf_done(addblk->bp);
 
 326  * Split the root.  We have to create a new root and point to the two
 
 327  * parts (the split old root) that we just created.  Copy block zero to
 
 328  * the EOF, extending the inode in process.
 
 330 STATIC int                                              /* error */
 
 331 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 
 332                                  xfs_da_state_blk_t *blk2)
 
 334         xfs_da_intnode_t *node, *oldroot;
 
 342         xfs_dir2_leaf_t *leaf;
 
 345          * Copy the existing (incorrect) block from the root node position
 
 346          * to a free space somewhere.
 
 349         ASSERT(args != NULL);
 
 350         error = xfs_da_grow_inode(args, &blkno);
 
 356         error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
 
 361         oldroot = blk1->bp->data;
 
 362         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
 
 363                 size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
 
 366                 ASSERT(XFS_DIR_IS_V2(mp));
 
 367                 ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
 
 368                 leaf = (xfs_dir2_leaf_t *)oldroot;
 
 369                 size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
 
 372         memcpy(node, oldroot, size);
 
 373         xfs_da_log_buf(tp, bp, 0, size - 1);
 
 374         xfs_da_buf_done(blk1->bp);
 
 379          * Set up the new root node.
 
 381         error = xfs_da_node_create(args,
 
 382                 args->whichfork == XFS_DATA_FORK &&
 
 383                 XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
 
 384                 INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
 
 388         INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
 
 389         INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
 
 390         INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
 
 391         INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
 
 392         INT_SET(node->hdr.count, ARCH_CONVERT, 2);
 
 395         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
 
 396                 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
 
 397                        blk1->blkno < mp->m_dirfreeblk);
 
 398                 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
 
 399                        blk2->blkno < mp->m_dirfreeblk);
 
 403         /* Header is already logged by xfs_da_node_create */
 
 404         xfs_da_log_buf(tp, bp,
 
 405                 XFS_DA_LOGRANGE(node, node->btree,
 
 406                         sizeof(xfs_da_node_entry_t) * 2));
 
 413  * Split the node, rebalance, then add the new entry.
 
 415 STATIC int                                              /* error */
 
 416 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 
 417                                  xfs_da_state_blk_t *newblk,
 
 418                                  xfs_da_state_blk_t *addblk,
 
 419                                  int treelevel, int *result)
 
 421         xfs_da_intnode_t *node;
 
 426         node = oldblk->bp->data;
 
 427         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
 430          * With V2 the extra block is data or freespace.
 
 432         useextra = state->extravalid && (XFS_DIR_IS_V1(state->mp) ||
 
 433                         state->args->whichfork == XFS_ATTR_FORK);
 
 434         newcount = 1 + useextra;
 
 436          * Do we have to split the node?
 
 438         if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > state->node_ents) {
 
 440                  * Allocate a new node, add to the doubly linked chain of
 
 441                  * nodes, then move some of our excess entries into it.
 
 443                 error = xfs_da_grow_inode(state->args, &blkno);
 
 445                         return(error);  /* GROT: dir is inconsistent */
 
 447                 error = xfs_da_node_create(state->args, blkno, treelevel,
 
 448                                            &newblk->bp, state->args->whichfork);
 
 450                         return(error);  /* GROT: dir is inconsistent */
 
 451                 newblk->blkno = blkno;
 
 452                 newblk->magic = XFS_DA_NODE_MAGIC;
 
 453                 xfs_da_node_rebalance(state, oldblk, newblk);
 
 454                 error = xfs_da_blk_link(state, oldblk, newblk);
 
 463          * Insert the new entry(s) into the correct block
 
 464          * (updating last hashval in the process).
 
 466          * xfs_da_node_add() inserts BEFORE the given index,
 
 467          * and as a result of using node_lookup_int() we always
 
 468          * point to a valid entry (not after one), but a split
 
 469          * operation always results in a new block whose hashvals
 
 470          * FOLLOW the current block.
 
 472          * If we had double-split op below us, then add the extra block too.
 
 474         node = oldblk->bp->data;
 
 475         if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
 
 477                 xfs_da_node_add(state, oldblk, addblk);
 
 479                         if (state->extraafter)
 
 481                         xfs_da_node_add(state, oldblk, &state->extrablk);
 
 482                         state->extravalid = 0;
 
 486                 xfs_da_node_add(state, newblk, addblk);
 
 488                         if (state->extraafter)
 
 490                         xfs_da_node_add(state, newblk, &state->extrablk);
 
 491                         state->extravalid = 0;
 
 499  * Balance the btree elements between two intermediate nodes,
 
 500  * usually one full and one empty.
 
 502  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
 
 505 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
 
 506                                      xfs_da_state_blk_t *blk2)
 
 508         xfs_da_intnode_t *node1, *node2, *tmpnode;
 
 509         xfs_da_node_entry_t *btree_s, *btree_d;
 
 513         node1 = blk1->bp->data;
 
 514         node2 = blk2->bp->data;
 
 516          * Figure out how many entries need to move, and in which direction.
 
 517          * Swap the nodes around if that makes it simpler.
 
 519         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
 
 520             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
 
 521              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
 
 522               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
 
 527         ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
 528         ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
 529         count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
 
 532         tp = state->args->trans;
 
 534          * Two cases: high-to-low and low-to-high.
 
 538                  * Move elements in node2 up to make a hole.
 
 540                 if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
 
 541                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
 
 542                         btree_s = &node2->btree[0];
 
 543                         btree_d = &node2->btree[count];
 
 544                         memmove(btree_d, btree_s, tmp);
 
 548                  * Move the req'd B-tree elements from high in node1 to
 
 551                 INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
 
 552                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 
 553                 btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
 
 554                 btree_d = &node2->btree[0];
 
 555                 memcpy(btree_d, btree_s, tmp);
 
 556                 INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
 
 560                  * Move the req'd B-tree elements from low in node2 to
 
 564                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
 
 565                 btree_s = &node2->btree[0];
 
 566                 btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
 
 567                 memcpy(btree_d, btree_s, tmp);
 
 568                 INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
 
 569                 xfs_da_log_buf(tp, blk1->bp,
 
 570                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
 
 573                  * Move elements in node2 down to fill the hole.
 
 575                 tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
 
 576                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
 
 577                 btree_s = &node2->btree[count];
 
 578                 btree_d = &node2->btree[0];
 
 579                 memmove(btree_d, btree_s, tmp);
 
 580                 INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
 
 584          * Log header of node 1 and all current bits of node 2.
 
 586         xfs_da_log_buf(tp, blk1->bp,
 
 587                 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
 
 588         xfs_da_log_buf(tp, blk2->bp,
 
 589                 XFS_DA_LOGRANGE(node2, &node2->hdr,
 
 591                         sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
 
 594          * Record the last hashval from each block for upward propagation.
 
 595          * (note: don't use the swapped node pointers)
 
 597         node1 = blk1->bp->data;
 
 598         node2 = blk2->bp->data;
 
 599         blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 
 600         blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 
 603          * Adjust the expected index for insertion.
 
 605         if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
 
 606                 blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
 
 607                 blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;      /* make it invalid */
 
 612  * Add a new entry to an intermediate node.
 
 615 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
 
 616                                xfs_da_state_blk_t *newblk)
 
 618         xfs_da_intnode_t *node;
 
 619         xfs_da_node_entry_t *btree;
 
 623         node = oldblk->bp->data;
 
 625         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
 626         ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
 
 627         ASSERT(newblk->blkno != 0);
 
 628         if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
 
 629                 ASSERT(newblk->blkno >= mp->m_dirleafblk &&
 
 630                        newblk->blkno < mp->m_dirfreeblk);
 
 633          * We may need to make some room before we insert the new node.
 
 636         btree = &node->btree[ oldblk->index ];
 
 637         if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
 
 638                 tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
 
 639                 memmove(btree + 1, btree, tmp);
 
 641         INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
 
 642         INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
 
 643         xfs_da_log_buf(state->args->trans, oldblk->bp,
 
 644                 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
 
 645         INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
 
 646         xfs_da_log_buf(state->args->trans, oldblk->bp,
 
 647                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
 650          * Copy the last hash value from the oldblk to propagate upwards.
 
 652         oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 
 655 /*========================================================================
 
 656  * Routines used for shrinking the Btree.
 
 657  *========================================================================*/
 
 660  * Deallocate an empty leaf node, remove it from its parent,
 
 661  * possibly deallocating that block, etc...
 
 664 xfs_da_join(xfs_da_state_t *state)
 
 666         xfs_da_state_blk_t *drop_blk, *save_blk;
 
 670         drop_blk = &state->path.blk[ state->path.active-1 ];
 
 671         save_blk = &state->altpath.blk[ state->path.active-1 ];
 
 672         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
 
 673         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
 
 674                drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
 
 677          * Walk back up the tree joining/deallocating as necessary.
 
 678          * When we stop dropping blocks, break out.
 
 680         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
 
 681                  state->path.active--) {
 
 683                  * See if we can combine the block with a neighbor.
 
 684                  *   (action == 0) => no options, just leave
 
 685                  *   (action == 1) => coalesce, then unlink
 
 686                  *   (action == 2) => block empty, unlink it
 
 688                 switch (drop_blk->magic) {
 
 689                 case XFS_ATTR_LEAF_MAGIC:
 
 690                         error = xfs_attr_leaf_toosmall(state, &action);
 
 695                         xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
 
 697                 case XFS_DIR_LEAF_MAGIC:
 
 698                         ASSERT(XFS_DIR_IS_V1(state->mp));
 
 699                         error = xfs_dir_leaf_toosmall(state, &action);
 
 704                         xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
 
 706                 case XFS_DIR2_LEAFN_MAGIC:
 
 707                         ASSERT(XFS_DIR_IS_V2(state->mp));
 
 708                         error = xfs_dir2_leafn_toosmall(state, &action);
 
 713                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
 
 715                 case XFS_DA_NODE_MAGIC:
 
 717                          * Remove the offending node, fixup hashvals,
 
 718                          * check for a toosmall neighbor.
 
 720                         xfs_da_node_remove(state, drop_blk);
 
 721                         xfs_da_fixhashpath(state, &state->path);
 
 722                         error = xfs_da_node_toosmall(state, &action);
 
 727                         xfs_da_node_unbalance(state, drop_blk, save_blk);
 
 730                 xfs_da_fixhashpath(state, &state->altpath);
 
 731                 error = xfs_da_blk_unlink(state, drop_blk, save_blk);
 
 732                 xfs_da_state_kill_altpath(state);
 
 735                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
 
 742          * We joined all the way to the top.  If it turns out that
 
 743          * we only have one entry in the root, make the child block
 
 746         xfs_da_node_remove(state, drop_blk);
 
 747         xfs_da_fixhashpath(state, &state->path);
 
 748         error = xfs_da_root_join(state, &state->path.blk[0]);
 
 753  * We have only one entry in the root.  Copy the only remaining child of
 
 754  * the old root to block 0 as the new root node.
 
 757 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
 
 759         xfs_da_intnode_t *oldroot;
 
 761         xfs_da_blkinfo_t *blkinfo;
 
 768         ASSERT(args != NULL);
 
 769         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
 
 770         oldroot = root_blk->bp->data;
 
 771         ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
 772         ASSERT(!oldroot->hdr.info.forw);
 
 773         ASSERT(!oldroot->hdr.info.back);
 
 776          * If the root has more than one child, then don't do anything.
 
 778         if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
 
 782          * Read in the (only) child block, then copy those bytes into
 
 783          * the root block's buffer and free the original child block.
 
 785         child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
 
 787         error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
 
 793         if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
 
 794                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 
 795                        INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
 
 797                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
 799         ASSERT(!blkinfo->forw);
 
 800         ASSERT(!blkinfo->back);
 
 801         memcpy(root_blk->bp->data, bp->data, state->blocksize);
 
 802         xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
 
 803         error = xfs_da_shrink_inode(args, child, bp);
 
 808  * Check a node block and its neighbors to see if the block should be
 
 809  * collapsed into one or the other neighbor.  Always keep the block
 
 810  * with the smaller block number.
 
 811  * If the current block is over 50% full, don't try to join it, return 0.
 
 812  * If the block is empty, fill in the state structure and return 2.
 
 813  * If it can be collapsed, fill in the state structure and return 1.
 
 814  * If nothing can be done, return 0.
 
 817 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
 
 819         xfs_da_intnode_t *node;
 
 820         xfs_da_state_blk_t *blk;
 
 821         xfs_da_blkinfo_t *info;
 
 822         int count, forward, error, retval, i;
 
 827          * Check for the degenerate case of the block being over 50% full.
 
 828          * If so, it's not worth even looking to see if we might be able
 
 829          * to coalesce with a sibling.
 
 831         blk = &state->path.blk[ state->path.active-1 ];
 
 832         info = blk->bp->data;
 
 833         ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
 834         node = (xfs_da_intnode_t *)info;
 
 835         count = INT_GET(node->hdr.count, ARCH_CONVERT);
 
 836         if (count > (state->node_ents >> 1)) {
 
 837                 *action = 0;    /* blk over 50%, don't try to join */
 
 838                 return(0);      /* blk over 50%, don't try to join */
 
 842          * Check for the degenerate case of the block being empty.
 
 843          * If the block is empty, we'll simply delete it, no need to
 
 844          * coalesce it with a sibling block.  We choose (aribtrarily)
 
 845          * to merge with the forward block unless it is NULL.
 
 849                  * Make altpath point to the block we want to keep and
 
 850                  * path point to the block we want to drop (this one).
 
 852                 forward = info->forw;
 
 853                 memcpy(&state->altpath, &state->path, sizeof(state->path));
 
 854                 error = xfs_da_path_shift(state, &state->altpath, forward,
 
 867          * Examine each sibling block to see if we can coalesce with
 
 868          * at least 25% free space to spare.  We need to figure out
 
 869          * whether to merge with the forward or the backward block.
 
 870          * We prefer coalescing with the lower numbered sibling so as
 
 871          * to shrink a directory over time.
 
 873         /* start with smaller blk num */
 
 874         forward = (INT_GET(info->forw, ARCH_CONVERT)
 
 875                                 < INT_GET(info->back, ARCH_CONVERT));
 
 876         for (i = 0; i < 2; forward = !forward, i++) {
 
 878                         blkno = INT_GET(info->forw, ARCH_CONVERT);
 
 880                         blkno = INT_GET(info->back, ARCH_CONVERT);
 
 883                 error = xfs_da_read_buf(state->args->trans, state->args->dp,
 
 884                                         blkno, -1, &bp, state->args->whichfork);
 
 889                 node = (xfs_da_intnode_t *)info;
 
 890                 count  = state->node_ents;
 
 891                 count -= state->node_ents >> 2;
 
 892                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
 
 894                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
 895                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
 
 896                 xfs_da_brelse(state->args->trans, bp);
 
 898                         break;  /* fits with at least 25% to spare */
 
 906          * Make altpath point to the block we want to keep (the lower
 
 907          * numbered block) and path point to the block we want to drop.
 
 909         memcpy(&state->altpath, &state->path, sizeof(state->path));
 
 910         if (blkno < blk->blkno) {
 
 911                 error = xfs_da_path_shift(state, &state->altpath, forward,
 
 921                 error = xfs_da_path_shift(state, &state->path, forward,
 
 936  * Walk back up the tree adjusting hash values as necessary,
 
 937  * when we stop making changes, return.
 
 940 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
 
 942         xfs_da_state_blk_t *blk;
 
 943         xfs_da_intnode_t *node;
 
 944         xfs_da_node_entry_t *btree;
 
 945         xfs_dahash_t lasthash=0;
 
 948         level = path->active-1;
 
 949         blk = &path->blk[ level ];
 
 950         switch (blk->magic) {
 
 951         case XFS_ATTR_LEAF_MAGIC:
 
 952                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
 
 956         case XFS_DIR_LEAF_MAGIC:
 
 957                 ASSERT(XFS_DIR_IS_V1(state->mp));
 
 958                 lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
 
 962         case XFS_DIR2_LEAFN_MAGIC:
 
 963                 ASSERT(XFS_DIR_IS_V2(state->mp));
 
 964                 lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
 
 968         case XFS_DA_NODE_MAGIC:
 
 969                 lasthash = xfs_da_node_lasthash(blk->bp, &count);
 
 974         for (blk--, level--; level >= 0; blk--, level--) {
 
 975                 node = blk->bp->data;
 
 976                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
 977                 btree = &node->btree[ blk->index ];
 
 978                 if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
 
 980                 blk->hashval = lasthash;
 
 981                 INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
 
 982                 xfs_da_log_buf(state->args->trans, blk->bp,
 
 983                                   XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 
 985                 lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 
 990  * Remove an entry from an intermediate node.
 
 993 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
 
 995         xfs_da_intnode_t *node;
 
 996         xfs_da_node_entry_t *btree;
 
 999         node = drop_blk->bp->data;
 
1000         ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
 
1001         ASSERT(drop_blk->index >= 0);
 
1004          * Copy over the offending entry, or just zero it out.
 
1006         btree = &node->btree[drop_blk->index];
 
1007         if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
 
1008                 tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
 
1009                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
 
1010                 memmove(btree, btree + 1, tmp);
 
1011                 xfs_da_log_buf(state->args->trans, drop_blk->bp,
 
1012                     XFS_DA_LOGRANGE(node, btree, tmp));
 
1013                 btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
 
1015         memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
 
1016         xfs_da_log_buf(state->args->trans, drop_blk->bp,
 
1017             XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
 
1018         INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
 
1019         xfs_da_log_buf(state->args->trans, drop_blk->bp,
 
1020             XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
 
1023          * Copy the last hash value from the block to propagate upwards.
 
1026         drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
 
1030  * Unbalance the btree elements between two intermediate nodes,
 
1031  * move all Btree elements from one node into another.
 
1034 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 
1035                                      xfs_da_state_blk_t *save_blk)
 
1037         xfs_da_intnode_t *drop_node, *save_node;
 
1038         xfs_da_node_entry_t *btree;
 
1042         drop_node = drop_blk->bp->data;
 
1043         save_node = save_blk->bp->data;
 
1044         ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
1045         ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
1046         tp = state->args->trans;
 
1049          * If the dying block has lower hashvals, then move all the
 
1050          * elements in the remaining block up to make a hole.
 
1052         if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
 
1053             (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
 
1054              INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
 
1056                 btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
 
1057                 tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
 
1058                 memmove(btree, &save_node->btree[0], tmp);
 
1059                 btree = &save_node->btree[0];
 
1060                 xfs_da_log_buf(tp, save_blk->bp,
 
1061                         XFS_DA_LOGRANGE(save_node, btree,
 
1062                                 (INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
 
1063                                 sizeof(xfs_da_node_entry_t)));
 
1065                 btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
 
1066                 xfs_da_log_buf(tp, save_blk->bp,
 
1067                         XFS_DA_LOGRANGE(save_node, btree,
 
1068                                 INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
 
1069                                 sizeof(xfs_da_node_entry_t)));
 
1073          * Move all the B-tree elements from drop_blk to save_blk.
 
1075         tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
 
1076         memcpy(btree, &drop_node->btree[0], tmp);
 
1077         INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
 
1079         xfs_da_log_buf(tp, save_blk->bp,
 
1080                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
 
1081                         sizeof(save_node->hdr)));
 
1084          * Save the last hashval in the remaining block for upward propagation.
 
1086         save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 
1089 /*========================================================================
 
1090  * Routines used for finding things in the Btree.
 
1091  *========================================================================*/
 
1094  * Walk down the Btree looking for a particular filename, filling
 
1095  * in the state structure as we go.
 
1097  * We will set the state structure to point to each of the elements
 
1098  * in each of the nodes where either the hashval is or should be.
 
1100  * We support duplicate hashval's so for each entry in the current
 
1101  * node that could contain the desired hashval, descend.  This is a
 
1102  * pruned depth-first tree search.
 
1105 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
 
1107         xfs_da_state_blk_t *blk;
 
1108         xfs_da_blkinfo_t *curr;
 
1109         xfs_da_intnode_t *node;
 
1110         xfs_da_node_entry_t *btree;
 
1112         int probe, span, max, error, retval;
 
1113         xfs_dahash_t hashval;
 
1114         xfs_da_args_t *args;
 
1119          * Descend thru the B-tree searching each level for the right
 
1120          * node to use, until the right hashval is found.
 
1122         if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
 
1123                 blkno = state->mp->m_dirleafblk;
 
1126         for (blk = &state->path.blk[0], state->path.active = 1;
 
1127                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
 
1128                          blk++, state->path.active++) {
 
1130                  * Read the next node down in the tree.
 
1133                 error = xfs_da_read_buf(args->trans, args->dp, blkno,
 
1134                                         -1, &blk->bp, args->whichfork);
 
1137                         state->path.active--;
 
1140                 curr = blk->bp->data;
 
1141                 ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
 
1142                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 
1143                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
 
1146                  * Search an intermediate node for a match.
 
1148                 blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
 
1149                 if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
 
1150                         node = blk->bp->data;
 
1151                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 
1154                          * Binary search.  (note: small blocks will skip loop)
 
1156                         max = INT_GET(node->hdr.count, ARCH_CONVERT);
 
1157                         probe = span = max / 2;
 
1158                         hashval = args->hashval;
 
1159                         for (btree = &node->btree[probe]; span > 4;
 
1160                                    btree = &node->btree[probe]) {
 
1162                                 if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
 
1164                                 else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
 
1169                         ASSERT((probe >= 0) && (probe < max));
 
1170                         ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
 
1173                          * Since we may have duplicate hashval's, find the first
 
1174                          * matching hashval in the node.
 
1176                         while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
 
1180                         while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
 
1186                          * Pick the right block to descend on.
 
1190                                 blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
 
1193                                 blkno = INT_GET(btree->before, ARCH_CONVERT);
 
1196                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
 
1197                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
 
1200                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
 
1201                         blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
 
1204                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
 
1205                         blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
 
1211          * A leaf block that ends in the hashval that we are interested in
 
1212          * (final hashval == search hashval) means that the next block may
 
1213          * contain more entries with the same hashval, shift upward to the
 
1214          * next leaf and keep searching.
 
1217                 if (blk->magic == XFS_DIR_LEAF_MAGIC) {
 
1218                         ASSERT(XFS_DIR_IS_V1(state->mp));
 
1219                         retval = xfs_dir_leaf_lookup_int(blk->bp, args,
 
1221                 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
 
1222                         ASSERT(XFS_DIR_IS_V2(state->mp));
 
1223                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
 
1224                                                         &blk->index, state);
 
1226                 else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
 
1227                         retval = xfs_attr_leaf_lookup_int(blk->bp, args);
 
1228                         blk->index = args->index;
 
1229                         args->blkno = blk->blkno;
 
1231                 if (((retval == ENOENT) || (retval == ENOATTR)) &&
 
1232                     (blk->hashval == args->hashval)) {
 
1233                         error = xfs_da_path_shift(state, &state->path, 1, 1,
 
1240                         else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
 
1241                                 /* path_shift() gives ENOENT */
 
1242                                 retval = XFS_ERROR(ENOATTR);
 
1251 /*========================================================================
 
1253  *========================================================================*/
 
1256  * Link a new block into a doubly linked list of blocks (of whatever type).
 
1259 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
 
1260                                xfs_da_state_blk_t *new_blk)
 
1262         xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
 
1263         xfs_da_args_t *args;
 
1264         int before=0, error;
 
1268          * Set up environment.
 
1271         ASSERT(args != NULL);
 
1272         old_info = old_blk->bp->data;
 
1273         new_info = new_blk->bp->data;
 
1274         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
 
1275                old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 
1276                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
 
1277         ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
 
1278         ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
 
1279         ASSERT(old_blk->magic == new_blk->magic);
 
1281         switch (old_blk->magic) {
 
1282         case XFS_ATTR_LEAF_MAGIC:
 
1283                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
 
1285         case XFS_DIR_LEAF_MAGIC:
 
1286                 ASSERT(XFS_DIR_IS_V1(state->mp));
 
1287                 before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
 
1289         case XFS_DIR2_LEAFN_MAGIC:
 
1290                 ASSERT(XFS_DIR_IS_V2(state->mp));
 
1291                 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
 
1293         case XFS_DA_NODE_MAGIC:
 
1294                 before = xfs_da_node_order(old_blk->bp, new_blk->bp);
 
1299          * Link blocks in appropriate order.
 
1303                  * Link new block in before existing block.
 
1305                 INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
 
1306                 new_info->back = old_info->back; /* INT_: direct copy */
 
1307                 if (INT_GET(old_info->back, ARCH_CONVERT)) {
 
1308                         error = xfs_da_read_buf(args->trans, args->dp,
 
1309                                                 INT_GET(old_info->back,
 
1310                                                         ARCH_CONVERT), -1, &bp,
 
1315                         tmp_info = bp->data;
 
1316                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
 
1317                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
 
1318                         INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
 
1319                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
 
1320                         xfs_da_buf_done(bp);
 
1322                 INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
 
1325                  * Link new block in after existing block.
 
1327                 new_info->forw = old_info->forw; /* INT_: direct copy */
 
1328                 INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
 
1329                 if (INT_GET(old_info->forw, ARCH_CONVERT)) {
 
1330                         error = xfs_da_read_buf(args->trans, args->dp,
 
1331                                                 INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
 
1336                         tmp_info = bp->data;
 
1337                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
 
1338                                     == INT_GET(old_info->magic, ARCH_CONVERT));
 
1339                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
 
1341                         INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
 
1342                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
 
1343                         xfs_da_buf_done(bp);
 
1345                 INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
 
1348         xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
 
1349         xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
 
1354  * Compare two intermediate nodes for "order".
 
1357 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
 
1359         xfs_da_intnode_t *node1, *node2;
 
1361         node1 = node1_bp->data;
 
1362         node2 = node2_bp->data;
 
1363         ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
 
1364                (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
 
1365         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
 
1366             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
 
1367               INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
 
1368              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
 
1369               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
 
1376  * Pick up the last hashvalue from an intermediate node.
 
1379 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
 
1381         xfs_da_intnode_t *node;
 
1384         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
1386                 *count = INT_GET(node->hdr.count, ARCH_CONVERT);
 
1387         if (!node->hdr.count)
 
1389         return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
 
1393  * Unlink a block from a doubly linked list of blocks.
 
1395 STATIC int                                              /* error */
 
1396 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
 
1397                                  xfs_da_state_blk_t *save_blk)
 
1399         xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
 
1400         xfs_da_args_t *args;
 
1405          * Set up environment.
 
1408         ASSERT(args != NULL);
 
1409         save_info = save_blk->bp->data;
 
1410         drop_info = drop_blk->bp->data;
 
1411         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
 
1412                save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 
1413                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
 
1414         ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
 
1415         ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
 
1416         ASSERT(save_blk->magic == drop_blk->magic);
 
1417         ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
 
1418                (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
 
1419         ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
 
1420                (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
 
1423          * Unlink the leaf block from the doubly linked chain of leaves.
 
1425         if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
 
1426                 save_info->back = drop_info->back; /* INT_: direct copy */
 
1427                 if (INT_GET(drop_info->back, ARCH_CONVERT)) {
 
1428                         error = xfs_da_read_buf(args->trans, args->dp,
 
1429                                                 INT_GET(drop_info->back,
 
1430                                                         ARCH_CONVERT), -1, &bp,
 
1435                         tmp_info = bp->data;
 
1436                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
 
1437                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
 
1438                         INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
 
1439                         xfs_da_log_buf(args->trans, bp, 0,
 
1440                                                     sizeof(*tmp_info) - 1);
 
1441                         xfs_da_buf_done(bp);
 
1444                 save_info->forw = drop_info->forw; /* INT_: direct copy */
 
1445                 if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
 
1446                         error = xfs_da_read_buf(args->trans, args->dp,
 
1447                                                 INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
 
1452                         tmp_info = bp->data;
 
1453                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
 
1454                                     == INT_GET(save_info->magic, ARCH_CONVERT));
 
1455                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
 
1456                                     == drop_blk->blkno);
 
1457                         INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
 
1458                         xfs_da_log_buf(args->trans, bp, 0,
 
1459                                                     sizeof(*tmp_info) - 1);
 
1460                         xfs_da_buf_done(bp);
 
1464         xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
 
1469  * Move a path "forward" or "!forward" one block at the current level.
 
1471  * This routine will adjust a "path" to point to the next block
 
1472  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
 
1473  * Btree, including updating pointers to the intermediate nodes between
 
1474  * the new bottom and the root.
 
1477 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
 
1478                                  int forward, int release, int *result)
 
1480         xfs_da_state_blk_t *blk;
 
1481         xfs_da_blkinfo_t *info;
 
1482         xfs_da_intnode_t *node;
 
1483         xfs_da_args_t *args;
 
1484         xfs_dablk_t blkno=0;
 
1488          * Roll up the Btree looking for the first block where our
 
1489          * current index is not at the edge of the block.  Note that
 
1490          * we skip the bottom layer because we want the sibling block.
 
1493         ASSERT(args != NULL);
 
1494         ASSERT(path != NULL);
 
1495         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
 
1496         level = (path->active-1) - 1;   /* skip bottom layer in path */
 
1497         for (blk = &path->blk[level]; level >= 0; blk--, level--) {
 
1498                 ASSERT(blk->bp != NULL);
 
1499                 node = blk->bp->data;
 
1500                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
1501                 if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
 
1503                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
 
1505                 } else if (!forward && (blk->index > 0)) {
 
1507                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
 
1512                 *result = XFS_ERROR(ENOENT);    /* we're out of our tree */
 
1513                 ASSERT(args->oknoent);
 
1518          * Roll down the edge of the subtree until we reach the
 
1519          * same depth we were at originally.
 
1521         for (blk++, level++; level < path->active; blk++, level++) {
 
1523                  * Release the old block.
 
1524                  * (if it's dirty, trans won't actually let go)
 
1527                         xfs_da_brelse(args->trans, blk->bp);
 
1530                  * Read the next child block.
 
1533                 error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
 
1534                                                      &blk->bp, args->whichfork);
 
1537                 ASSERT(blk->bp != NULL);
 
1538                 info = blk->bp->data;
 
1539                 ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
 
1540                        INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
 
1541                        INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
 
1542                 blk->magic = INT_GET(info->magic, ARCH_CONVERT);
 
1543                 if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
 
1544                         node = (xfs_da_intnode_t *)info;
 
1545                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
 
1549                                 blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
 
1550                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
 
1552                         ASSERT(level == path->active-1);
 
1554                         switch(blk->magic) {
 
1555                         case XFS_ATTR_LEAF_MAGIC:
 
1556                                 blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
 
1559                         case XFS_DIR_LEAF_MAGIC:
 
1560                                 ASSERT(XFS_DIR_IS_V1(state->mp));
 
1561                                 blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
 
1564                         case XFS_DIR2_LEAFN_MAGIC:
 
1565                                 ASSERT(XFS_DIR_IS_V2(state->mp));
 
1566                                 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
 
1570                                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
 
1572                                        XFS_DIRX_LEAF_MAGIC(state->mp));
 
1582 /*========================================================================
 
1584  *========================================================================*/
 
1587  * Implement a simple hash on a character string.
 
1588  * Rotate the hash value by 7 bits, then XOR each character in.
 
1589  * This is implemented with some source-level loop unrolling.
 
1592 xfs_da_hashname(const uchar_t *name, int namelen)
 
1597          * Do four characters at a time as long as we can.
 
1599         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
 
1600                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
 
1601                        (name[3] << 0) ^ rol32(hash, 7 * 4);
 
1604          * Now do the rest of the characters.
 
1608                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
 
1611                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
 
1613                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
 
1614         default: /* case 0: */
 
1620  * Add a block to the btree ahead of the file.
 
1621  * Return the new block number to the caller.
 
1624 xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
 
1626         xfs_fileoff_t bno, b;
 
1627         xfs_bmbt_irec_t map;
 
1628         xfs_bmbt_irec_t *mapp;
 
1630         int nmap, error, w, count, c, got, i, mapi;
 
1637         w = args->whichfork;
 
1640          * For new directories adjust the file offset and block count.
 
1642         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) {
 
1643                 bno = mp->m_dirleafblk;
 
1644                 count = mp->m_dirblkfsbs;
 
1650          * Find a spot in the file space to put the new block.
 
1652         if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) {
 
1655         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
 
1656                 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
 
1658          * Try mapping it in one filesystem block.
 
1661         ASSERT(args->firstblock != NULL);
 
1662         if ((error = xfs_bmapi(tp, dp, bno, count,
 
1663                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
 
1665                         args->firstblock, args->total, &map, &nmap,
 
1675          * If we didn't get it and the block might work if fragmented,
 
1676          * try without the CONTIG flag.  Loop until we get it all.
 
1678         else if (nmap == 0 && count > 1) {
 
1679                 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
 
1680                 for (b = bno, mapi = 0; b < bno + count; ) {
 
1681                         nmap = MIN(XFS_BMAP_MAX_NMAP, count);
 
1682                         c = (int)(bno + count - b);
 
1683                         if ((error = xfs_bmapi(tp, dp, b, c,
 
1684                                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
 
1686                                         args->firstblock, args->total,
 
1687                                         &mapp[mapi], &nmap, args->flist))) {
 
1688                                 kmem_free(mapp, sizeof(*mapp) * count);
 
1694                         b = mapp[mapi - 1].br_startoff +
 
1695                             mapp[mapi - 1].br_blockcount;
 
1702          * Count the blocks we got, make sure it matches the total.
 
1704         for (i = 0, got = 0; i < mapi; i++)
 
1705                 got += mapp[i].br_blockcount;
 
1706         if (got != count || mapp[0].br_startoff != bno ||
 
1707             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
 
1710                         kmem_free(mapp, sizeof(*mapp) * count);
 
1711                 return XFS_ERROR(ENOSPC);
 
1714                 kmem_free(mapp, sizeof(*mapp) * count);
 
1715         *new_blkno = (xfs_dablk_t)bno;
 
1717          * For version 1 directories, adjust the file size if it changed.
 
1719         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
 
1721                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
 
1723                 size = XFS_FSB_TO_B(mp, bno);
 
1724                 if (size != dp->i_d.di_size) {
 
1725                         dp->i_d.di_size = size;
 
1726                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
1733  * Ick.  We need to always be able to remove a btree block, even
 
1734  * if there's no space reservation because the filesystem is full.
 
1735  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
 
1736  * It swaps the target block with the last block in the file.  The
 
1737  * last block in the file can always be removed since it can't cause
 
1738  * a bmap btree split to do that.
 
1741 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
 
1742                       xfs_dabuf_t **dead_bufp)
 
1744         xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
 
1745         xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
 
1746         xfs_fileoff_t lastoff;
 
1750         int error, w, entno, level, dead_level;
 
1751         xfs_da_blkinfo_t *dead_info, *sib_info;
 
1752         xfs_da_intnode_t *par_node, *dead_node;
 
1753         xfs_dir_leafblock_t *dead_leaf;
 
1754         xfs_dir2_leaf_t *dead_leaf2;
 
1755         xfs_dahash_t dead_hash;
 
1757         dead_buf = *dead_bufp;
 
1758         dead_blkno = *dead_blknop;
 
1761         w = args->whichfork;
 
1762         ASSERT(w == XFS_DATA_FORK);
 
1764         if (XFS_DIR_IS_V2(mp)) {
 
1765                 lastoff = mp->m_dirfreeblk;
 
1766                 error = xfs_bmap_last_before(tp, ip, &lastoff, w);
 
1768                 error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
 
1771         if (unlikely(lastoff == 0)) {
 
1772                 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
 
1774                 return XFS_ERROR(EFSCORRUPTED);
 
1777          * Read the last block in the btree space.
 
1779         last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
 
1780         if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
 
1783          * Copy the last block into the dead buffer and log it.
 
1785         memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
 
1786         xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
 
1787         dead_info = dead_buf->data;
 
1789          * Get values from the moved block.
 
1791         if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
 
1792                 ASSERT(XFS_DIR_IS_V1(mp));
 
1793                 dead_leaf = (xfs_dir_leafblock_t *)dead_info;
 
1796                         INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
 
1797         } else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
 
1798                 ASSERT(XFS_DIR_IS_V2(mp));
 
1799                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
 
1801                 dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
 
1803                 ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
 
1804                 dead_node = (xfs_da_intnode_t *)dead_info;
 
1805                 dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
 
1806                 dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
 
1808         sib_buf = par_buf = NULL;
 
1810          * If the moved block has a left sibling, fix up the pointers.
 
1812         if ((sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT))) {
 
1813                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
 
1815                 sib_info = sib_buf->data;
 
1817                     INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno ||
 
1818                     INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT))) {
 
1819                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
 
1820                                          XFS_ERRLEVEL_LOW, mp);
 
1821                         error = XFS_ERROR(EFSCORRUPTED);
 
1824                 INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno);
 
1825                 xfs_da_log_buf(tp, sib_buf,
 
1826                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
 
1827                                         sizeof(sib_info->forw)));
 
1828                 xfs_da_buf_done(sib_buf);
 
1832          * If the moved block has a right sibling, fix up the pointers.
 
1834         if ((sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT))) {
 
1835                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
 
1837                 sib_info = sib_buf->data;
 
1839                        INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno
 
1840                     || INT_GET(sib_info->magic, ARCH_CONVERT)
 
1841                                 != INT_GET(dead_info->magic, ARCH_CONVERT))) {
 
1842                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
 
1843                                          XFS_ERRLEVEL_LOW, mp);
 
1844                         error = XFS_ERROR(EFSCORRUPTED);
 
1847                 INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno);
 
1848                 xfs_da_log_buf(tp, sib_buf,
 
1849                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
 
1850                                         sizeof(sib_info->back)));
 
1851                 xfs_da_buf_done(sib_buf);
 
1854         par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk;
 
1857          * Walk down the tree looking for the parent of the moved block.
 
1860                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
 
1862                 par_node = par_buf->data;
 
1864                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC ||
 
1865                     (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1))) {
 
1866                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
 
1867                                          XFS_ERRLEVEL_LOW, mp);
 
1868                         error = XFS_ERROR(EFSCORRUPTED);
 
1871                 level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
 
1873                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
 
1874                      INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash;
 
1877                 if (unlikely(entno == INT_GET(par_node->hdr.count, ARCH_CONVERT))) {
 
1878                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
 
1879                                          XFS_ERRLEVEL_LOW, mp);
 
1880                         error = XFS_ERROR(EFSCORRUPTED);
 
1883                 par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT);
 
1884                 if (level == dead_level + 1)
 
1886                 xfs_da_brelse(tp, par_buf);
 
1890          * We're in the right parent block.
 
1891          * Look for the right entry.
 
1895                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
 
1896                      INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno;
 
1899                 if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
 
1901                 par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT);
 
1902                 xfs_da_brelse(tp, par_buf);
 
1904                 if (unlikely(par_blkno == 0)) {
 
1905                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
 
1906                                          XFS_ERRLEVEL_LOW, mp);
 
1907                         error = XFS_ERROR(EFSCORRUPTED);
 
1910                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
 
1912                 par_node = par_buf->data;
 
1914                     INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
 
1915                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)) {
 
1916                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
 
1917                                          XFS_ERRLEVEL_LOW, mp);
 
1918                         error = XFS_ERROR(EFSCORRUPTED);
 
1924          * Update the parent entry pointing to the moved block.
 
1926         INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno);
 
1927         xfs_da_log_buf(tp, par_buf,
 
1928                 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
 
1929                                 sizeof(par_node->btree[entno].before)));
 
1930         xfs_da_buf_done(par_buf);
 
1931         xfs_da_buf_done(dead_buf);
 
1932         *dead_blknop = last_blkno;
 
1933         *dead_bufp = last_buf;
 
1937                 xfs_da_brelse(tp, par_buf);
 
1939                 xfs_da_brelse(tp, sib_buf);
 
1940         xfs_da_brelse(tp, last_buf);
 
1945  * Remove a btree block from a directory or attribute.
 
1948 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
 
1949                     xfs_dabuf_t *dead_buf)
 
1952         int done, error, w, count;
 
1959         w = args->whichfork;
 
1962         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
 
1963                 count = mp->m_dirblkfsbs;
 
1968                  * Remove extents.  If we get ENOSPC for a dir we have to move
 
1969                  * the last block to the place we want to kill.
 
1971                 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
 
1972                                 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
 
1973                                 0, args->firstblock, args->flist,
 
1974                                 &done)) == ENOSPC) {
 
1975                         if (w != XFS_DATA_FORK)
 
1977                         if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
 
1986         xfs_da_binval(tp, dead_buf);
 
1988          * Adjust the directory size for version 1.
 
1990         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
 
1991                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
 
1993                 size = XFS_FSB_TO_B(dp->i_mount, bno);
 
1994                 if (size != dp->i_d.di_size) {
 
1995                         dp->i_d.di_size = size;
 
1996                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
2001         xfs_da_binval(tp, dead_buf);
 
2006  * See if the mapping(s) for this btree block are valid, i.e.
 
2007  * don't contain holes, are logically contiguous, and cover the whole range.
 
2010 xfs_da_map_covers_blocks(
 
2012         xfs_bmbt_irec_t *mapp,
 
2019         for (i = 0, off = bno; i < nmap; i++) {
 
2020                 if (mapp[i].br_startblock == HOLESTARTBLOCK ||
 
2021                     mapp[i].br_startblock == DELAYSTARTBLOCK) {
 
2024                 if (off != mapp[i].br_startoff) {
 
2027                 off += mapp[i].br_blockcount;
 
2029         return off == bno + count;
 
2034  * Used for get_buf, read_buf, read_bufr, and reada_buf.
 
2041         xfs_daddr_t     *mappedbnop,
 
2047         xfs_buf_t       *bp = NULL;
 
2051         xfs_bmbt_irec_t map;
 
2052         xfs_bmbt_irec_t *mapp;
 
2053         xfs_daddr_t     mappedbno;
 
2061         if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
 
2062                 nfsb = mp->m_dirblkfsbs;
 
2065         mappedbno = *mappedbnop;
 
2067          * Caller doesn't have a mapping.  -2 means don't complain
 
2068          * if we land in a hole.
 
2070         if (mappedbno == -1 || mappedbno == -2) {
 
2072                  * Optimize the one-block case.
 
2078                             xfs_bmapi_single(trans, dp, whichfork, &fsb,
 
2079                                     (xfs_fileoff_t)bno))) {
 
2083                         if (fsb == NULLFSBLOCK) {
 
2086                                 map.br_startblock = fsb;
 
2087                                 map.br_startoff = (xfs_fileoff_t)bno;
 
2088                                 map.br_blockcount = 1;
 
2092                         mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
 
2094                         if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
 
2096                                         XFS_BMAPI_METADATA |
 
2097                                                 XFS_BMAPI_AFLAG(whichfork),
 
2098                                         NULL, 0, mapp, &nmap, NULL)))
 
2102                 map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
 
2103                 map.br_startoff = (xfs_fileoff_t)bno;
 
2104                 map.br_blockcount = nfsb;
 
2108         if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
 
2109                 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
 
2110                 if (unlikely(error == EFSCORRUPTED)) {
 
2111                         if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
 
2113                                 cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n",
 
2115                                 cmn_err(CE_ALERT, "dir: inode %lld\n",
 
2116                                         (long long)dp->i_ino);
 
2117                                 for (i = 0; i < nmap; i++) {
 
2119                                                 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n",
 
2121                                                 (long long)mapp[i].br_startoff,
 
2122                                                 (long long)mapp[i].br_startblock,
 
2123                                                 (long long)mapp[i].br_blockcount,
 
2127                         XFS_ERROR_REPORT("xfs_da_do_buf(1)",
 
2128                                          XFS_ERRLEVEL_LOW, mp);
 
2132         if (caller != 3 && nmap > 1) {
 
2133                 bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
 
2138          * Turn the mapping(s) into buffer(s).
 
2140         for (i = 0; i < nmap; i++) {
 
2143                 mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
 
2145                         *mappedbnop = mappedbno;
 
2146                 nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
 
2149                         bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
 
2150                                 mappedbno, nmapped, 0);
 
2151                         error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
 
2156                         error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
 
2157                                 mappedbno, nmapped, 0, &bp);
 
2160                         xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
 
2167                                 xfs_trans_brelse(trans, bp);
 
2173                         if (whichfork == XFS_ATTR_FORK) {
 
2174                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
 
2175                                                 XFS_ATTR_BTREE_REF);
 
2177                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
 
2182                         bplist[nbplist++] = bp;
 
2186          * Build a dabuf structure.
 
2189                 rbp = xfs_da_buf_make(nbplist, bplist, ra);
 
2191                 rbp = xfs_da_buf_make(1, &bp, ra);
 
2195          * For read_buf, check the magic number.
 
2198                 xfs_dir2_data_t         *data;
 
2199                 xfs_dir2_free_t         *free;
 
2200                 xfs_da_blkinfo_t        *info;
 
2206                 magic = INT_GET(info->magic, ARCH_CONVERT);
 
2207                 magic1 = INT_GET(data->hdr.magic, ARCH_CONVERT);
 
2209                     XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
 
2210                                    (magic != XFS_DIR_LEAF_MAGIC) &&
 
2211                                    (magic != XFS_ATTR_LEAF_MAGIC) &&
 
2212                                    (magic != XFS_DIR2_LEAF1_MAGIC) &&
 
2213                                    (magic != XFS_DIR2_LEAFN_MAGIC) &&
 
2214                                    (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
 
2215                                    (magic1 != XFS_DIR2_DATA_MAGIC) &&
 
2216                                    (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC),
 
2217                                 mp, XFS_ERRTAG_DA_READ_BUF,
 
2218                                 XFS_RANDOM_DA_READ_BUF))) {
 
2219                         xfs_buftrace("DA READ ERROR", rbp->bps[0]);
 
2220                         XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
 
2221                                              XFS_ERRLEVEL_LOW, mp, info);
 
2222                         error = XFS_ERROR(EFSCORRUPTED);
 
2223                         xfs_da_brelse(trans, rbp);
 
2229                 kmem_free(bplist, sizeof(*bplist) * nmap);
 
2232                 kmem_free(mapp, sizeof(*mapp) * nfsb);
 
2239                 for (i = 0; i < nbplist; i++)
 
2240                         xfs_trans_brelse(trans, bplist[i]);
 
2241                 kmem_free(bplist, sizeof(*bplist) * nmap);
 
2245                 kmem_free(mapp, sizeof(*mapp) * nfsb);
 
2252  * Get a buffer for the dir/attr block.
 
2259         xfs_daddr_t             mappedbno,
 
2263         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
 
2264                                                  (inst_t *)__return_address);
 
2268  * Get a buffer for the dir/attr block, fill in the contents.
 
2275         xfs_daddr_t             mappedbno,
 
2279         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
 
2280                 (inst_t *)__return_address);
 
2284  * Readahead the dir/attr block.
 
2296         if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
 
2297                         (inst_t *)__return_address))
 
2304  * Calculate the number of bits needed to hold i different values.
 
2307 xfs_da_log2_roundup(uint i)
 
2311         for (rval = 0; rval < NBBY * sizeof(i); rval++) {
 
2312                 if ((1 << rval) >= i)
 
2318 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
 
2319 kmem_zone_t *xfs_dabuf_zone;            /* dabuf zone */
 
2322  * Allocate a dir-state structure.
 
2323  * We don't put them on the stack since they're large.
 
2326 xfs_da_state_alloc(void)
 
2328         return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
 
2332  * Kill the altpath contents of a da-state structure.
 
2335 xfs_da_state_kill_altpath(xfs_da_state_t *state)
 
2339         for (i = 0; i < state->altpath.active; i++) {
 
2340                 if (state->altpath.blk[i].bp) {
 
2341                         if (state->altpath.blk[i].bp != state->path.blk[i].bp)
 
2342                                 xfs_da_buf_done(state->altpath.blk[i].bp);
 
2343                         state->altpath.blk[i].bp = NULL;
 
2346         state->altpath.active = 0;
 
2350  * Free a da-state structure.
 
2353 xfs_da_state_free(xfs_da_state_t *state)
 
2357         xfs_da_state_kill_altpath(state);
 
2358         for (i = 0; i < state->path.active; i++) {
 
2359                 if (state->path.blk[i].bp)
 
2360                         xfs_da_buf_done(state->path.blk[i].bp);
 
2362         if (state->extravalid && state->extrablk.bp)
 
2363                 xfs_da_buf_done(state->extrablk.bp);
 
2365         memset((char *)state, 0, sizeof(*state));
 
2367         kmem_zone_free(xfs_da_state_zone, state);
 
2370 #ifdef XFS_DABUF_DEBUG
 
2371 xfs_dabuf_t     *xfs_dabuf_global_list;
 
2372 lock_t          xfs_dabuf_global_lock;
 
2379 STATIC xfs_dabuf_t *
 
2380 xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
 
2388                 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
 
2390                 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
 
2392 #ifdef XFS_DABUF_DEBUG
 
2394         dabuf->target = XFS_BUF_TARGET(bps[0]);
 
2395         dabuf->blkno = XFS_BUF_ADDR(bps[0]);
 
2400                 dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
 
2401                 dabuf->data = XFS_BUF_PTR(bp);
 
2405                 for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
 
2406                         dabuf->bps[i] = bp = bps[i];
 
2407                         dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
 
2409                 dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
 
2410                 for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
 
2412                         memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
 
2416 #ifdef XFS_DABUF_DEBUG
 
2421                 s = mutex_spinlock(&xfs_dabuf_global_lock);
 
2422                 for (p = xfs_dabuf_global_list; p; p = p->next) {
 
2423                         ASSERT(p->blkno != dabuf->blkno ||
 
2424                                p->target != dabuf->target);
 
2427                 if (xfs_dabuf_global_list)
 
2428                         xfs_dabuf_global_list->prev = dabuf;
 
2429                 dabuf->next = xfs_dabuf_global_list;
 
2430                 xfs_dabuf_global_list = dabuf;
 
2431                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
 
2441 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
 
2448                 ASSERT(dabuf->nbuf > 1);
 
2450                 for (i = off = 0; i < dabuf->nbuf;
 
2451                                 i++, off += XFS_BUF_COUNT(bp)) {
 
2453                         memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
 
2463 xfs_da_buf_done(xfs_dabuf_t *dabuf)
 
2466         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
 
2468                 xfs_da_buf_clean(dabuf);
 
2469         if (dabuf->nbuf > 1)
 
2470                 kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
 
2471 #ifdef XFS_DABUF_DEBUG
 
2475                 s = mutex_spinlock(&xfs_dabuf_global_lock);
 
2477                         dabuf->prev->next = dabuf->next;
 
2479                         xfs_dabuf_global_list = dabuf->next;
 
2481                         dabuf->next->prev = dabuf->prev;
 
2482                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
 
2484         memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
 
2486         if (dabuf->nbuf == 1)
 
2487                 kmem_zone_free(xfs_dabuf_zone, dabuf);
 
2489                 kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
 
2493  * Log transaction from a dabuf.
 
2496 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
 
2504         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
 
2505         if (dabuf->nbuf == 1) {
 
2506                 ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
 
2507                 xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
 
2511         ASSERT(first <= last);
 
2512         for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
 
2515                 l = f + XFS_BUF_COUNT(bp) - 1;
 
2521                         xfs_trans_log_buf(tp, bp, f - off, l - off);
 
2523                  * B_DONE is set by xfs_trans_log buf.
 
2524                  * If we don't set it on a new buffer (get not read)
 
2525                  * then if we don't put anything in the buffer it won't
 
2526                  * be set, and at commit it it released into the cache,
 
2527                  * and then a read will fail.
 
2529                 else if (!(XFS_BUF_ISDONE(bp)))
 
2536  * Release dabuf from a transaction.
 
2537  * Have to free up the dabuf before the buffers are released,
 
2538  * since the synchronization on the dabuf is really the lock on the buffer.
 
2541 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 
2548         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
 
2549         if ((nbuf = dabuf->nbuf) == 1) {
 
2553                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
 
2554                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
 
2556         xfs_da_buf_done(dabuf);
 
2557         for (i = 0; i < nbuf; i++)
 
2558                 xfs_trans_brelse(tp, bplist[i]);
 
2560                 kmem_free(bplist, nbuf * sizeof(*bplist));
 
2564  * Invalidate dabuf from a transaction.
 
2567 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
 
2574         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
 
2575         if ((nbuf = dabuf->nbuf) == 1) {
 
2579                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
 
2580                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
 
2582         xfs_da_buf_done(dabuf);
 
2583         for (i = 0; i < nbuf; i++)
 
2584                 xfs_trans_binval(tp, bplist[i]);
 
2586                 kmem_free(bplist, nbuf * sizeof(*bplist));
 
2590  * Get the first daddr from a dabuf.
 
2593 xfs_da_blkno(xfs_dabuf_t *dabuf)
 
2595         ASSERT(dabuf->nbuf);
 
2596         ASSERT(dabuf->data);
 
2597         return XFS_BUF_ADDR(dabuf->bps[0]);