[XFS] Remove xfs_macros.c, xfs_macros.h, rework headers a whole lot.
[linux-2.6] / fs / xfs / xfs_da_btree.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32 #include "xfs.h"
33 #include "xfs_fs.h"
34 #include "xfs_types.h"
35 #include "xfs_bit.h"
36 #include "xfs_log.h"
37 #include "xfs_inum.h"
38 #include "xfs_trans.h"
39 #include "xfs_sb.h"
40 #include "xfs_ag.h"
41 #include "xfs_dir.h"
42 #include "xfs_dir2.h"
43 #include "xfs_dmapi.h"
44 #include "xfs_mount.h"
45 #include "xfs_da_btree.h"
46 #include "xfs_bmap_btree.h"
47 #include "xfs_alloc_btree.h"
48 #include "xfs_ialloc_btree.h"
49 #include "xfs_dir_sf.h"
50 #include "xfs_dir2_sf.h"
51 #include "xfs_attr_sf.h"
52 #include "xfs_dinode.h"
53 #include "xfs_inode.h"
54 #include "xfs_inode_item.h"
55 #include "xfs_alloc.h"
56 #include "xfs_btree.h"
57 #include "xfs_bmap.h"
58 #include "xfs_attr.h"
59 #include "xfs_attr_leaf.h"
60 #include "xfs_dir_leaf.h"
61 #include "xfs_dir2_data.h"
62 #include "xfs_dir2_leaf.h"
63 #include "xfs_dir2_block.h"
64 #include "xfs_dir2_node.h"
65 #include "xfs_error.h"
66
67 /*
68  * xfs_da_btree.c
69  *
70  * Routines to implement directories as Btrees of hashed names.
71  */
72
73 /*========================================================================
74  * Function prototypes for the kernel.
75  *========================================================================*/
76
77 /*
78  * Routines used for growing the Btree.
79  */
80 STATIC int xfs_da_root_split(xfs_da_state_t *state,
81                                             xfs_da_state_blk_t *existing_root,
82                                             xfs_da_state_blk_t *new_child);
83 STATIC int xfs_da_node_split(xfs_da_state_t *state,
84                                             xfs_da_state_blk_t *existing_blk,
85                                             xfs_da_state_blk_t *split_blk,
86                                             xfs_da_state_blk_t *blk_to_add,
87                                             int treelevel,
88                                             int *result);
89 STATIC void xfs_da_node_rebalance(xfs_da_state_t *state,
90                                          xfs_da_state_blk_t *node_blk_1,
91                                          xfs_da_state_blk_t *node_blk_2);
92 STATIC void xfs_da_node_add(xfs_da_state_t *state,
93                                    xfs_da_state_blk_t *old_node_blk,
94                                    xfs_da_state_blk_t *new_node_blk);
95
96 /*
97  * Routines used for shrinking the Btree.
98  */
99 STATIC int xfs_da_root_join(xfs_da_state_t *state,
100                                            xfs_da_state_blk_t *root_blk);
101 STATIC int xfs_da_node_toosmall(xfs_da_state_t *state, int *retval);
102 STATIC void xfs_da_node_remove(xfs_da_state_t *state,
103                                               xfs_da_state_blk_t *drop_blk);
104 STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
105                                          xfs_da_state_blk_t *src_node_blk,
106                                          xfs_da_state_blk_t *dst_node_blk);
107
108 /*
109  * Utility routines.
110  */
111 STATIC uint     xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
112 STATIC int      xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
113 STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
114 STATIC int      xfs_da_blk_unlink(xfs_da_state_t *state,
115                                   xfs_da_state_blk_t *drop_blk,
116                                   xfs_da_state_blk_t *save_blk);
117 STATIC void     xfs_da_state_kill_altpath(xfs_da_state_t *state);
118
119 /*========================================================================
120  * Routines used for growing the Btree.
121  *========================================================================*/
122
123 /*
124  * Create the initial contents of an intermediate node.
125  */
126 int
127 xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
128                                  xfs_dabuf_t **bpp, int whichfork)
129 {
130         xfs_da_intnode_t *node;
131         xfs_dabuf_t *bp;
132         int error;
133         xfs_trans_t *tp;
134
135         tp = args->trans;
136         error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork);
137         if (error)
138                 return(error);
139         ASSERT(bp != NULL);
140         node = bp->data;
141         node->hdr.info.forw = 0;
142         node->hdr.info.back = 0;
143         INT_SET(node->hdr.info.magic, ARCH_CONVERT, XFS_DA_NODE_MAGIC);
144         node->hdr.info.pad = 0;
145         node->hdr.count = 0;
146         INT_SET(node->hdr.level, ARCH_CONVERT, level);
147
148         xfs_da_log_buf(tp, bp,
149                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
150
151         *bpp = bp;
152         return(0);
153 }
154
155 /*
156  * Split a leaf node, rebalance, then possibly split
157  * intermediate nodes, rebalance, etc.
158  */
159 int                                                     /* error */
160 xfs_da_split(xfs_da_state_t *state)
161 {
162         xfs_da_state_blk_t *oldblk, *newblk, *addblk;
163         xfs_da_intnode_t *node;
164         xfs_dabuf_t *bp;
165         int max, action, error, i;
166
167         /*
168          * Walk back up the tree splitting/inserting/adjusting as necessary.
169          * If we need to insert and there isn't room, split the node, then
170          * decide which fragment to insert the new block from below into.
171          * Note that we may split the root this way, but we need more fixup.
172          */
173         max = state->path.active - 1;
174         ASSERT((max >= 0) && (max < XFS_DA_NODE_MAXDEPTH));
175         ASSERT(state->path.blk[max].magic == XFS_ATTR_LEAF_MAGIC ||
176                state->path.blk[max].magic == XFS_DIRX_LEAF_MAGIC(state->mp));
177
178         addblk = &state->path.blk[max];         /* initial dummy value */
179         for (i = max; (i >= 0) && addblk; state->path.active--, i--) {
180                 oldblk = &state->path.blk[i];
181                 newblk = &state->altpath.blk[i];
182
183                 /*
184                  * If a leaf node then
185                  *     Allocate a new leaf node, then rebalance across them.
186                  * else if an intermediate node then
187                  *     We split on the last layer, must we split the node?
188                  */
189                 switch (oldblk->magic) {
190                 case XFS_ATTR_LEAF_MAGIC:
191                         error = xfs_attr_leaf_split(state, oldblk, newblk);
192                         if ((error != 0) && (error != ENOSPC)) {
193                                 return(error);  /* GROT: attr is inconsistent */
194                         }
195                         if (!error) {
196                                 addblk = newblk;
197                                 break;
198                         }
199                         /*
200                          * Entry wouldn't fit, split the leaf again.
201                          */
202                         state->extravalid = 1;
203                         if (state->inleaf) {
204                                 state->extraafter = 0;  /* before newblk */
205                                 error = xfs_attr_leaf_split(state, oldblk,
206                                                             &state->extrablk);
207                         } else {
208                                 state->extraafter = 1;  /* after newblk */
209                                 error = xfs_attr_leaf_split(state, newblk,
210                                                             &state->extrablk);
211                         }
212                         if (error)
213                                 return(error);  /* GROT: attr inconsistent */
214                         addblk = newblk;
215                         break;
216                 case XFS_DIR_LEAF_MAGIC:
217                         ASSERT(XFS_DIR_IS_V1(state->mp));
218                         error = xfs_dir_leaf_split(state, oldblk, newblk);
219                         if ((error != 0) && (error != ENOSPC)) {
220                                 return(error);  /* GROT: dir is inconsistent */
221                         }
222                         if (!error) {
223                                 addblk = newblk;
224                                 break;
225                         }
226                         /*
227                          * Entry wouldn't fit, split the leaf again.
228                          */
229                         state->extravalid = 1;
230                         if (state->inleaf) {
231                                 state->extraafter = 0;  /* before newblk */
232                                 error = xfs_dir_leaf_split(state, oldblk,
233                                                            &state->extrablk);
234                                 if (error)
235                                         return(error);  /* GROT: dir incon. */
236                                 addblk = newblk;
237                         } else {
238                                 state->extraafter = 1;  /* after newblk */
239                                 error = xfs_dir_leaf_split(state, newblk,
240                                                            &state->extrablk);
241                                 if (error)
242                                         return(error);  /* GROT: dir incon. */
243                                 addblk = newblk;
244                         }
245                         break;
246                 case XFS_DIR2_LEAFN_MAGIC:
247                         ASSERT(XFS_DIR_IS_V2(state->mp));
248                         error = xfs_dir2_leafn_split(state, oldblk, newblk);
249                         if (error)
250                                 return error;
251                         addblk = newblk;
252                         break;
253                 case XFS_DA_NODE_MAGIC:
254                         error = xfs_da_node_split(state, oldblk, newblk, addblk,
255                                                          max - i, &action);
256                         xfs_da_buf_done(addblk->bp);
257                         addblk->bp = NULL;
258                         if (error)
259                                 return(error);  /* GROT: dir is inconsistent */
260                         /*
261                          * Record the newly split block for the next time thru?
262                          */
263                         if (action)
264                                 addblk = newblk;
265                         else
266                                 addblk = NULL;
267                         break;
268                 }
269
270                 /*
271                  * Update the btree to show the new hashval for this child.
272                  */
273                 xfs_da_fixhashpath(state, &state->path);
274                 /*
275                  * If we won't need this block again, it's getting dropped
276                  * from the active path by the loop control, so we need
277                  * to mark it done now.
278                  */
279                 if (i > 0 || !addblk)
280                         xfs_da_buf_done(oldblk->bp);
281         }
282         if (!addblk)
283                 return(0);
284
285         /*
286          * Split the root node.
287          */
288         ASSERT(state->path.active == 0);
289         oldblk = &state->path.blk[0];
290         error = xfs_da_root_split(state, oldblk, addblk);
291         if (error) {
292                 xfs_da_buf_done(oldblk->bp);
293                 xfs_da_buf_done(addblk->bp);
294                 addblk->bp = NULL;
295                 return(error);  /* GROT: dir is inconsistent */
296         }
297
298         /*
299          * Update pointers to the node which used to be block 0 and
300          * just got bumped because of the addition of a new root node.
301          * There might be three blocks involved if a double split occurred,
302          * and the original block 0 could be at any position in the list.
303          */
304
305         node = oldblk->bp->data;
306         if (node->hdr.info.forw) {
307                 if (INT_GET(node->hdr.info.forw, ARCH_CONVERT) == addblk->blkno) {
308                         bp = addblk->bp;
309                 } else {
310                         ASSERT(state->extravalid);
311                         bp = state->extrablk.bp;
312                 }
313                 node = bp->data;
314                 INT_SET(node->hdr.info.back, ARCH_CONVERT, oldblk->blkno);
315                 xfs_da_log_buf(state->args->trans, bp,
316                     XFS_DA_LOGRANGE(node, &node->hdr.info,
317                     sizeof(node->hdr.info)));
318         }
319         node = oldblk->bp->data;
320         if (INT_GET(node->hdr.info.back, ARCH_CONVERT)) {
321                 if (INT_GET(node->hdr.info.back, ARCH_CONVERT) == addblk->blkno) {
322                         bp = addblk->bp;
323                 } else {
324                         ASSERT(state->extravalid);
325                         bp = state->extrablk.bp;
326                 }
327                 node = bp->data;
328                 INT_SET(node->hdr.info.forw, ARCH_CONVERT, oldblk->blkno);
329                 xfs_da_log_buf(state->args->trans, bp,
330                     XFS_DA_LOGRANGE(node, &node->hdr.info,
331                     sizeof(node->hdr.info)));
332         }
333         xfs_da_buf_done(oldblk->bp);
334         xfs_da_buf_done(addblk->bp);
335         addblk->bp = NULL;
336         return(0);
337 }
338
339 /*
340  * Split the root.  We have to create a new root and point to the two
341  * parts (the split old root) that we just created.  Copy block zero to
342  * the EOF, extending the inode in process.
343  */
344 STATIC int                                              /* error */
345 xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
346                                  xfs_da_state_blk_t *blk2)
347 {
348         xfs_da_intnode_t *node, *oldroot;
349         xfs_da_args_t *args;
350         xfs_dablk_t blkno;
351         xfs_dabuf_t *bp;
352         int error, size;
353         xfs_inode_t *dp;
354         xfs_trans_t *tp;
355         xfs_mount_t *mp;
356         xfs_dir2_leaf_t *leaf;
357
358         /*
359          * Copy the existing (incorrect) block from the root node position
360          * to a free space somewhere.
361          */
362         args = state->args;
363         ASSERT(args != NULL);
364         error = xfs_da_grow_inode(args, &blkno);
365         if (error)
366                 return(error);
367         dp = args->dp;
368         tp = args->trans;
369         mp = state->mp;
370         error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
371         if (error)
372                 return(error);
373         ASSERT(bp != NULL);
374         node = bp->data;
375         oldroot = blk1->bp->data;
376         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
377                 size = (int)((char *)&oldroot->btree[INT_GET(oldroot->hdr.count, ARCH_CONVERT)] -
378                              (char *)oldroot);
379         } else {
380                 ASSERT(XFS_DIR_IS_V2(mp));
381                 ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC);
382                 leaf = (xfs_dir2_leaf_t *)oldroot;
383                 size = (int)((char *)&leaf->ents[INT_GET(leaf->hdr.count, ARCH_CONVERT)] -
384                              (char *)leaf);
385         }
386         memcpy(node, oldroot, size);
387         xfs_da_log_buf(tp, bp, 0, size - 1);
388         xfs_da_buf_done(blk1->bp);
389         blk1->bp = bp;
390         blk1->blkno = blkno;
391
392         /*
393          * Set up the new root node.
394          */
395         error = xfs_da_node_create(args,
396                 args->whichfork == XFS_DATA_FORK &&
397                 XFS_DIR_IS_V2(mp) ? mp->m_dirleafblk : 0,
398                 INT_GET(node->hdr.level, ARCH_CONVERT) + 1, &bp, args->whichfork);
399         if (error)
400                 return(error);
401         node = bp->data;
402         INT_SET(node->btree[0].hashval, ARCH_CONVERT, blk1->hashval);
403         INT_SET(node->btree[0].before, ARCH_CONVERT, blk1->blkno);
404         INT_SET(node->btree[1].hashval, ARCH_CONVERT, blk2->hashval);
405         INT_SET(node->btree[1].before, ARCH_CONVERT, blk2->blkno);
406         INT_SET(node->hdr.count, ARCH_CONVERT, 2);
407
408 #ifdef DEBUG
409         if (INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
410                 ASSERT(blk1->blkno >= mp->m_dirleafblk &&
411                        blk1->blkno < mp->m_dirfreeblk);
412                 ASSERT(blk2->blkno >= mp->m_dirleafblk &&
413                        blk2->blkno < mp->m_dirfreeblk);
414         }
415 #endif
416
417         /* Header is already logged by xfs_da_node_create */
418         xfs_da_log_buf(tp, bp,
419                 XFS_DA_LOGRANGE(node, node->btree,
420                         sizeof(xfs_da_node_entry_t) * 2));
421         xfs_da_buf_done(bp);
422
423         return(0);
424 }
425
426 /*
427  * Split the node, rebalance, then add the new entry.
428  */
429 STATIC int                                              /* error */
430 xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
431                                  xfs_da_state_blk_t *newblk,
432                                  xfs_da_state_blk_t *addblk,
433                                  int treelevel, int *result)
434 {
435         xfs_da_intnode_t *node;
436         xfs_dablk_t blkno;
437         int newcount, error;
438         int useextra;
439
440         node = oldblk->bp->data;
441         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
442
443         /*
444          * With V2 the extra block is data or freespace.
445          */
446         useextra = state->extravalid && XFS_DIR_IS_V1(state->mp);
447         newcount = 1 + useextra;
448         /*
449          * Do we have to split the node?
450          */
451         if ((INT_GET(node->hdr.count, ARCH_CONVERT) + newcount) > state->node_ents) {
452                 /*
453                  * Allocate a new node, add to the doubly linked chain of
454                  * nodes, then move some of our excess entries into it.
455                  */
456                 error = xfs_da_grow_inode(state->args, &blkno);
457                 if (error)
458                         return(error);  /* GROT: dir is inconsistent */
459
460                 error = xfs_da_node_create(state->args, blkno, treelevel,
461                                            &newblk->bp, state->args->whichfork);
462                 if (error)
463                         return(error);  /* GROT: dir is inconsistent */
464                 newblk->blkno = blkno;
465                 newblk->magic = XFS_DA_NODE_MAGIC;
466                 xfs_da_node_rebalance(state, oldblk, newblk);
467                 error = xfs_da_blk_link(state, oldblk, newblk);
468                 if (error)
469                         return(error);
470                 *result = 1;
471         } else {
472                 *result = 0;
473         }
474
475         /*
476          * Insert the new entry(s) into the correct block
477          * (updating last hashval in the process).
478          *
479          * xfs_da_node_add() inserts BEFORE the given index,
480          * and as a result of using node_lookup_int() we always
481          * point to a valid entry (not after one), but a split
482          * operation always results in a new block whose hashvals
483          * FOLLOW the current block.
484          *
485          * If we had double-split op below us, then add the extra block too.
486          */
487         node = oldblk->bp->data;
488         if (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)) {
489                 oldblk->index++;
490                 xfs_da_node_add(state, oldblk, addblk);
491                 if (useextra) {
492                         if (state->extraafter)
493                                 oldblk->index++;
494                         xfs_da_node_add(state, oldblk, &state->extrablk);
495                         state->extravalid = 0;
496                 }
497         } else {
498                 newblk->index++;
499                 xfs_da_node_add(state, newblk, addblk);
500                 if (useextra) {
501                         if (state->extraafter)
502                                 newblk->index++;
503                         xfs_da_node_add(state, newblk, &state->extrablk);
504                         state->extravalid = 0;
505                 }
506         }
507
508         return(0);
509 }
510
511 /*
512  * Balance the btree elements between two intermediate nodes,
513  * usually one full and one empty.
514  *
515  * NOTE: if blk2 is empty, then it will get the upper half of blk1.
516  */
517 STATIC void
518 xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
519                                      xfs_da_state_blk_t *blk2)
520 {
521         xfs_da_intnode_t *node1, *node2, *tmpnode;
522         xfs_da_node_entry_t *btree_s, *btree_d;
523         int count, tmp;
524         xfs_trans_t *tp;
525
526         node1 = blk1->bp->data;
527         node2 = blk2->bp->data;
528         /*
529          * Figure out how many entries need to move, and in which direction.
530          * Swap the nodes around if that makes it simpler.
531          */
532         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
533             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
534              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
535               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
536                 tmpnode = node1;
537                 node1 = node2;
538                 node2 = tmpnode;
539         }
540         ASSERT(INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
541         ASSERT(INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
542         count = (INT_GET(node1->hdr.count, ARCH_CONVERT) - INT_GET(node2->hdr.count, ARCH_CONVERT)) / 2;
543         if (count == 0)
544                 return;
545         tp = state->args->trans;
546         /*
547          * Two cases: high-to-low and low-to-high.
548          */
549         if (count > 0) {
550                 /*
551                  * Move elements in node2 up to make a hole.
552                  */
553                 if ((tmp = INT_GET(node2->hdr.count, ARCH_CONVERT)) > 0) {
554                         tmp *= (uint)sizeof(xfs_da_node_entry_t);
555                         btree_s = &node2->btree[0];
556                         btree_d = &node2->btree[count];
557                         memmove(btree_d, btree_s, tmp);
558                 }
559
560                 /*
561                  * Move the req'd B-tree elements from high in node1 to
562                  * low in node2.
563                  */
564                 INT_MOD(node2->hdr.count, ARCH_CONVERT, count);
565                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
566                 btree_s = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT) - count];
567                 btree_d = &node2->btree[0];
568                 memcpy(btree_d, btree_s, tmp);
569                 INT_MOD(node1->hdr.count, ARCH_CONVERT, -(count));
570
571         } else {
572                 /*
573                  * Move the req'd B-tree elements from low in node2 to
574                  * high in node1.
575                  */
576                 count = -count;
577                 tmp = count * (uint)sizeof(xfs_da_node_entry_t);
578                 btree_s = &node2->btree[0];
579                 btree_d = &node1->btree[INT_GET(node1->hdr.count, ARCH_CONVERT)];
580                 memcpy(btree_d, btree_s, tmp);
581                 INT_MOD(node1->hdr.count, ARCH_CONVERT, count);
582                 xfs_da_log_buf(tp, blk1->bp,
583                         XFS_DA_LOGRANGE(node1, btree_d, tmp));
584
585                 /*
586                  * Move elements in node2 down to fill the hole.
587                  */
588                 tmp  = INT_GET(node2->hdr.count, ARCH_CONVERT) - count;
589                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
590                 btree_s = &node2->btree[count];
591                 btree_d = &node2->btree[0];
592                 memmove(btree_d, btree_s, tmp);
593                 INT_MOD(node2->hdr.count, ARCH_CONVERT, -(count));
594         }
595
596         /*
597          * Log header of node 1 and all current bits of node 2.
598          */
599         xfs_da_log_buf(tp, blk1->bp,
600                 XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
601         xfs_da_log_buf(tp, blk2->bp,
602                 XFS_DA_LOGRANGE(node2, &node2->hdr,
603                         sizeof(node2->hdr) +
604                         sizeof(node2->btree[0]) * INT_GET(node2->hdr.count, ARCH_CONVERT)));
605
606         /*
607          * Record the last hashval from each block for upward propagation.
608          * (note: don't use the swapped node pointers)
609          */
610         node1 = blk1->bp->data;
611         node2 = blk2->bp->data;
612         blk1->hashval = INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
613         blk2->hashval = INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
614
615         /*
616          * Adjust the expected index for insertion.
617          */
618         if (blk1->index >= INT_GET(node1->hdr.count, ARCH_CONVERT)) {
619                 blk2->index = blk1->index - INT_GET(node1->hdr.count, ARCH_CONVERT);
620                 blk1->index = INT_GET(node1->hdr.count, ARCH_CONVERT) + 1;      /* make it invalid */
621         }
622 }
623
624 /*
625  * Add a new entry to an intermediate node.
626  */
627 STATIC void
628 xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
629                                xfs_da_state_blk_t *newblk)
630 {
631         xfs_da_intnode_t *node;
632         xfs_da_node_entry_t *btree;
633         int tmp;
634         xfs_mount_t *mp;
635
636         node = oldblk->bp->data;
637         mp = state->mp;
638         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
639         ASSERT((oldblk->index >= 0) && (oldblk->index <= INT_GET(node->hdr.count, ARCH_CONVERT)));
640         ASSERT(newblk->blkno != 0);
641         if (state->args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
642                 ASSERT(newblk->blkno >= mp->m_dirleafblk &&
643                        newblk->blkno < mp->m_dirfreeblk);
644
645         /*
646          * We may need to make some room before we insert the new node.
647          */
648         tmp = 0;
649         btree = &node->btree[ oldblk->index ];
650         if (oldblk->index < INT_GET(node->hdr.count, ARCH_CONVERT)) {
651                 tmp = (INT_GET(node->hdr.count, ARCH_CONVERT) - oldblk->index) * (uint)sizeof(*btree);
652                 memmove(btree + 1, btree, tmp);
653         }
654         INT_SET(btree->hashval, ARCH_CONVERT, newblk->hashval);
655         INT_SET(btree->before, ARCH_CONVERT, newblk->blkno);
656         xfs_da_log_buf(state->args->trans, oldblk->bp,
657                 XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
658         INT_MOD(node->hdr.count, ARCH_CONVERT, +1);
659         xfs_da_log_buf(state->args->trans, oldblk->bp,
660                 XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
661
662         /*
663          * Copy the last hash value from the oldblk to propagate upwards.
664          */
665         oldblk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
666 }
667
668 /*========================================================================
669  * Routines used for shrinking the Btree.
670  *========================================================================*/
671
672 /*
673  * Deallocate an empty leaf node, remove it from its parent,
674  * possibly deallocating that block, etc...
675  */
676 int
677 xfs_da_join(xfs_da_state_t *state)
678 {
679         xfs_da_state_blk_t *drop_blk, *save_blk;
680         int action, error;
681
682         action = 0;
683         drop_blk = &state->path.blk[ state->path.active-1 ];
684         save_blk = &state->altpath.blk[ state->path.active-1 ];
685         ASSERT(state->path.blk[0].magic == XFS_DA_NODE_MAGIC);
686         ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC ||
687                drop_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp));
688
689         /*
690          * Walk back up the tree joining/deallocating as necessary.
691          * When we stop dropping blocks, break out.
692          */
693         for (  ; state->path.active >= 2; drop_blk--, save_blk--,
694                  state->path.active--) {
695                 /*
696                  * See if we can combine the block with a neighbor.
697                  *   (action == 0) => no options, just leave
698                  *   (action == 1) => coalesce, then unlink
699                  *   (action == 2) => block empty, unlink it
700                  */
701                 switch (drop_blk->magic) {
702                 case XFS_ATTR_LEAF_MAGIC:
703                         error = xfs_attr_leaf_toosmall(state, &action);
704                         if (error)
705                                 return(error);
706                         if (action == 0)
707                                 return(0);
708                         xfs_attr_leaf_unbalance(state, drop_blk, save_blk);
709                         break;
710                 case XFS_DIR_LEAF_MAGIC:
711                         ASSERT(XFS_DIR_IS_V1(state->mp));
712                         error = xfs_dir_leaf_toosmall(state, &action);
713                         if (error)
714                                 return(error);
715                         if (action == 0)
716                                 return(0);
717                         xfs_dir_leaf_unbalance(state, drop_blk, save_blk);
718                         break;
719                 case XFS_DIR2_LEAFN_MAGIC:
720                         ASSERT(XFS_DIR_IS_V2(state->mp));
721                         error = xfs_dir2_leafn_toosmall(state, &action);
722                         if (error)
723                                 return error;
724                         if (action == 0)
725                                 return 0;
726                         xfs_dir2_leafn_unbalance(state, drop_blk, save_blk);
727                         break;
728                 case XFS_DA_NODE_MAGIC:
729                         /*
730                          * Remove the offending node, fixup hashvals,
731                          * check for a toosmall neighbor.
732                          */
733                         xfs_da_node_remove(state, drop_blk);
734                         xfs_da_fixhashpath(state, &state->path);
735                         error = xfs_da_node_toosmall(state, &action);
736                         if (error)
737                                 return(error);
738                         if (action == 0)
739                                 return 0;
740                         xfs_da_node_unbalance(state, drop_blk, save_blk);
741                         break;
742                 }
743                 xfs_da_fixhashpath(state, &state->altpath);
744                 error = xfs_da_blk_unlink(state, drop_blk, save_blk);
745                 xfs_da_state_kill_altpath(state);
746                 if (error)
747                         return(error);
748                 error = xfs_da_shrink_inode(state->args, drop_blk->blkno,
749                                                          drop_blk->bp);
750                 drop_blk->bp = NULL;
751                 if (error)
752                         return(error);
753         }
754         /*
755          * We joined all the way to the top.  If it turns out that
756          * we only have one entry in the root, make the child block
757          * the new root.
758          */
759         xfs_da_node_remove(state, drop_blk);
760         xfs_da_fixhashpath(state, &state->path);
761         error = xfs_da_root_join(state, &state->path.blk[0]);
762         return(error);
763 }
764
765 /*
766  * We have only one entry in the root.  Copy the only remaining child of
767  * the old root to block 0 as the new root node.
768  */
769 STATIC int
770 xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
771 {
772         xfs_da_intnode_t *oldroot;
773         /* REFERENCED */
774         xfs_da_blkinfo_t *blkinfo;
775         xfs_da_args_t *args;
776         xfs_dablk_t child;
777         xfs_dabuf_t *bp;
778         int error;
779
780         args = state->args;
781         ASSERT(args != NULL);
782         ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
783         oldroot = root_blk->bp->data;
784         ASSERT(INT_GET(oldroot->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
785         ASSERT(!oldroot->hdr.info.forw);
786         ASSERT(!oldroot->hdr.info.back);
787
788         /*
789          * If the root has more than one child, then don't do anything.
790          */
791         if (INT_GET(oldroot->hdr.count, ARCH_CONVERT) > 1)
792                 return(0);
793
794         /*
795          * Read in the (only) child block, then copy those bytes into
796          * the root block's buffer and free the original child block.
797          */
798         child = INT_GET(oldroot->btree[ 0 ].before, ARCH_CONVERT);
799         ASSERT(child != 0);
800         error = xfs_da_read_buf(args->trans, args->dp, child, -1, &bp,
801                                              args->whichfork);
802         if (error)
803                 return(error);
804         ASSERT(bp != NULL);
805         blkinfo = bp->data;
806         if (INT_GET(oldroot->hdr.level, ARCH_CONVERT) == 1) {
807                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
808                        INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
809         } else {
810                 ASSERT(INT_GET(blkinfo->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
811         }
812         ASSERT(!blkinfo->forw);
813         ASSERT(!blkinfo->back);
814         memcpy(root_blk->bp->data, bp->data, state->blocksize);
815         xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
816         error = xfs_da_shrink_inode(args, child, bp);
817         return(error);
818 }
819
820 /*
821  * Check a node block and its neighbors to see if the block should be
822  * collapsed into one or the other neighbor.  Always keep the block
823  * with the smaller block number.
824  * If the current block is over 50% full, don't try to join it, return 0.
825  * If the block is empty, fill in the state structure and return 2.
826  * If it can be collapsed, fill in the state structure and return 1.
827  * If nothing can be done, return 0.
828  */
829 STATIC int
830 xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
831 {
832         xfs_da_intnode_t *node;
833         xfs_da_state_blk_t *blk;
834         xfs_da_blkinfo_t *info;
835         int count, forward, error, retval, i;
836         xfs_dablk_t blkno;
837         xfs_dabuf_t *bp;
838
839         /*
840          * Check for the degenerate case of the block being over 50% full.
841          * If so, it's not worth even looking to see if we might be able
842          * to coalesce with a sibling.
843          */
844         blk = &state->path.blk[ state->path.active-1 ];
845         info = blk->bp->data;
846         ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
847         node = (xfs_da_intnode_t *)info;
848         count = INT_GET(node->hdr.count, ARCH_CONVERT);
849         if (count > (state->node_ents >> 1)) {
850                 *action = 0;    /* blk over 50%, don't try to join */
851                 return(0);      /* blk over 50%, don't try to join */
852         }
853
854         /*
855          * Check for the degenerate case of the block being empty.
856          * If the block is empty, we'll simply delete it, no need to
857          * coalesce it with a sibling block.  We choose (aribtrarily)
858          * to merge with the forward block unless it is NULL.
859          */
860         if (count == 0) {
861                 /*
862                  * Make altpath point to the block we want to keep and
863                  * path point to the block we want to drop (this one).
864                  */
865                 forward = info->forw;
866                 memcpy(&state->altpath, &state->path, sizeof(state->path));
867                 error = xfs_da_path_shift(state, &state->altpath, forward,
868                                                  0, &retval);
869                 if (error)
870                         return(error);
871                 if (retval) {
872                         *action = 0;
873                 } else {
874                         *action = 2;
875                 }
876                 return(0);
877         }
878
879         /*
880          * Examine each sibling block to see if we can coalesce with
881          * at least 25% free space to spare.  We need to figure out
882          * whether to merge with the forward or the backward block.
883          * We prefer coalescing with the lower numbered sibling so as
884          * to shrink a directory over time.
885          */
886         /* start with smaller blk num */
887         forward = (INT_GET(info->forw, ARCH_CONVERT)
888                                 < INT_GET(info->back, ARCH_CONVERT));
889         for (i = 0; i < 2; forward = !forward, i++) {
890                 if (forward)
891                         blkno = INT_GET(info->forw, ARCH_CONVERT);
892                 else
893                         blkno = INT_GET(info->back, ARCH_CONVERT);
894                 if (blkno == 0)
895                         continue;
896                 error = xfs_da_read_buf(state->args->trans, state->args->dp,
897                                         blkno, -1, &bp, state->args->whichfork);
898                 if (error)
899                         return(error);
900                 ASSERT(bp != NULL);
901
902                 node = (xfs_da_intnode_t *)info;
903                 count  = state->node_ents;
904                 count -= state->node_ents >> 2;
905                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
906                 node = bp->data;
907                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
908                 count -= INT_GET(node->hdr.count, ARCH_CONVERT);
909                 xfs_da_brelse(state->args->trans, bp);
910                 if (count >= 0)
911                         break;  /* fits with at least 25% to spare */
912         }
913         if (i >= 2) {
914                 *action = 0;
915                 return(0);
916         }
917
918         /*
919          * Make altpath point to the block we want to keep (the lower
920          * numbered block) and path point to the block we want to drop.
921          */
922         memcpy(&state->altpath, &state->path, sizeof(state->path));
923         if (blkno < blk->blkno) {
924                 error = xfs_da_path_shift(state, &state->altpath, forward,
925                                                  0, &retval);
926                 if (error) {
927                         return(error);
928                 }
929                 if (retval) {
930                         *action = 0;
931                         return(0);
932                 }
933         } else {
934                 error = xfs_da_path_shift(state, &state->path, forward,
935                                                  0, &retval);
936                 if (error) {
937                         return(error);
938                 }
939                 if (retval) {
940                         *action = 0;
941                         return(0);
942                 }
943         }
944         *action = 1;
945         return(0);
946 }
947
948 /*
949  * Walk back up the tree adjusting hash values as necessary,
950  * when we stop making changes, return.
951  */
952 void
953 xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
954 {
955         xfs_da_state_blk_t *blk;
956         xfs_da_intnode_t *node;
957         xfs_da_node_entry_t *btree;
958         xfs_dahash_t lasthash=0;
959         int level, count;
960
961         level = path->active-1;
962         blk = &path->blk[ level ];
963         switch (blk->magic) {
964         case XFS_ATTR_LEAF_MAGIC:
965                 lasthash = xfs_attr_leaf_lasthash(blk->bp, &count);
966                 if (count == 0)
967                         return;
968                 break;
969         case XFS_DIR_LEAF_MAGIC:
970                 ASSERT(XFS_DIR_IS_V1(state->mp));
971                 lasthash = xfs_dir_leaf_lasthash(blk->bp, &count);
972                 if (count == 0)
973                         return;
974                 break;
975         case XFS_DIR2_LEAFN_MAGIC:
976                 ASSERT(XFS_DIR_IS_V2(state->mp));
977                 lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count);
978                 if (count == 0)
979                         return;
980                 break;
981         case XFS_DA_NODE_MAGIC:
982                 lasthash = xfs_da_node_lasthash(blk->bp, &count);
983                 if (count == 0)
984                         return;
985                 break;
986         }
987         for (blk--, level--; level >= 0; blk--, level--) {
988                 node = blk->bp->data;
989                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
990                 btree = &node->btree[ blk->index ];
991                 if (INT_GET(btree->hashval, ARCH_CONVERT) == lasthash)
992                         break;
993                 blk->hashval = lasthash;
994                 INT_SET(btree->hashval, ARCH_CONVERT, lasthash);
995                 xfs_da_log_buf(state->args->trans, blk->bp,
996                                   XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
997
998                 lasthash = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
999         }
1000 }
1001
1002 /*
1003  * Remove an entry from an intermediate node.
1004  */
1005 STATIC void
1006 xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
1007 {
1008         xfs_da_intnode_t *node;
1009         xfs_da_node_entry_t *btree;
1010         int tmp;
1011
1012         node = drop_blk->bp->data;
1013         ASSERT(drop_blk->index < INT_GET(node->hdr.count, ARCH_CONVERT));
1014         ASSERT(drop_blk->index >= 0);
1015
1016         /*
1017          * Copy over the offending entry, or just zero it out.
1018          */
1019         btree = &node->btree[drop_blk->index];
1020         if (drop_blk->index < (INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1021                 tmp  = INT_GET(node->hdr.count, ARCH_CONVERT) - drop_blk->index - 1;
1022                 tmp *= (uint)sizeof(xfs_da_node_entry_t);
1023                 memmove(btree, btree + 1, tmp);
1024                 xfs_da_log_buf(state->args->trans, drop_blk->bp,
1025                     XFS_DA_LOGRANGE(node, btree, tmp));
1026                 btree = &node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ];
1027         }
1028         memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
1029         xfs_da_log_buf(state->args->trans, drop_blk->bp,
1030             XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
1031         INT_MOD(node->hdr.count, ARCH_CONVERT, -1);
1032         xfs_da_log_buf(state->args->trans, drop_blk->bp,
1033             XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
1034
1035         /*
1036          * Copy the last hash value from the block to propagate upwards.
1037          */
1038         btree--;
1039         drop_blk->hashval = INT_GET(btree->hashval, ARCH_CONVERT);
1040 }
1041
1042 /*
1043  * Unbalance the btree elements between two intermediate nodes,
1044  * move all Btree elements from one node into another.
1045  */
1046 STATIC void
1047 xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1048                                      xfs_da_state_blk_t *save_blk)
1049 {
1050         xfs_da_intnode_t *drop_node, *save_node;
1051         xfs_da_node_entry_t *btree;
1052         int tmp;
1053         xfs_trans_t *tp;
1054
1055         drop_node = drop_blk->bp->data;
1056         save_node = save_blk->bp->data;
1057         ASSERT(INT_GET(drop_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1058         ASSERT(INT_GET(save_node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1059         tp = state->args->trans;
1060
1061         /*
1062          * If the dying block has lower hashvals, then move all the
1063          * elements in the remaining block up to make a hole.
1064          */
1065         if ((INT_GET(drop_node->btree[ 0 ].hashval, ARCH_CONVERT) < INT_GET(save_node->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1066             (INT_GET(drop_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1067              INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))
1068         {
1069                 btree = &save_node->btree[ INT_GET(drop_node->hdr.count, ARCH_CONVERT) ];
1070                 tmp = INT_GET(save_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1071                 memmove(btree, &save_node->btree[0], tmp);
1072                 btree = &save_node->btree[0];
1073                 xfs_da_log_buf(tp, save_blk->bp,
1074                         XFS_DA_LOGRANGE(save_node, btree,
1075                                 (INT_GET(save_node->hdr.count, ARCH_CONVERT) + INT_GET(drop_node->hdr.count, ARCH_CONVERT)) *
1076                                 sizeof(xfs_da_node_entry_t)));
1077         } else {
1078                 btree = &save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT) ];
1079                 xfs_da_log_buf(tp, save_blk->bp,
1080                         XFS_DA_LOGRANGE(save_node, btree,
1081                                 INT_GET(drop_node->hdr.count, ARCH_CONVERT) *
1082                                 sizeof(xfs_da_node_entry_t)));
1083         }
1084
1085         /*
1086          * Move all the B-tree elements from drop_blk to save_blk.
1087          */
1088         tmp = INT_GET(drop_node->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_da_node_entry_t);
1089         memcpy(btree, &drop_node->btree[0], tmp);
1090         INT_MOD(save_node->hdr.count, ARCH_CONVERT, INT_GET(drop_node->hdr.count, ARCH_CONVERT));
1091
1092         xfs_da_log_buf(tp, save_blk->bp,
1093                 XFS_DA_LOGRANGE(save_node, &save_node->hdr,
1094                         sizeof(save_node->hdr)));
1095
1096         /*
1097          * Save the last hashval in the remaining block for upward propagation.
1098          */
1099         save_blk->hashval = INT_GET(save_node->btree[ INT_GET(save_node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1100 }
1101
1102 /*========================================================================
1103  * Routines used for finding things in the Btree.
1104  *========================================================================*/
1105
1106 /*
1107  * Walk down the Btree looking for a particular filename, filling
1108  * in the state structure as we go.
1109  *
1110  * We will set the state structure to point to each of the elements
1111  * in each of the nodes where either the hashval is or should be.
1112  *
1113  * We support duplicate hashval's so for each entry in the current
1114  * node that could contain the desired hashval, descend.  This is a
1115  * pruned depth-first tree search.
1116  */
1117 int                                                     /* error */
1118 xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
1119 {
1120         xfs_da_state_blk_t *blk;
1121         xfs_da_blkinfo_t *curr;
1122         xfs_da_intnode_t *node;
1123         xfs_da_node_entry_t *btree;
1124         xfs_dablk_t blkno;
1125         int probe, span, max, error, retval;
1126         xfs_dahash_t hashval;
1127         xfs_da_args_t *args;
1128
1129         args = state->args;
1130
1131         /*
1132          * Descend thru the B-tree searching each level for the right
1133          * node to use, until the right hashval is found.
1134          */
1135         if (args->whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(state->mp))
1136                 blkno = state->mp->m_dirleafblk;
1137         else
1138                 blkno = 0;
1139         for (blk = &state->path.blk[0], state->path.active = 1;
1140                          state->path.active <= XFS_DA_NODE_MAXDEPTH;
1141                          blk++, state->path.active++) {
1142                 /*
1143                  * Read the next node down in the tree.
1144                  */
1145                 blk->blkno = blkno;
1146                 error = xfs_da_read_buf(args->trans, args->dp, blkno,
1147                                         -1, &blk->bp, args->whichfork);
1148                 if (error) {
1149                         blk->blkno = 0;
1150                         state->path.active--;
1151                         return(error);
1152                 }
1153                 curr = blk->bp->data;
1154                 ASSERT(INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1155                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1156                        INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1157
1158                 /*
1159                  * Search an intermediate node for a match.
1160                  */
1161                 blk->magic = INT_GET(curr->magic, ARCH_CONVERT);
1162                 if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1163                         node = blk->bp->data;
1164                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1165
1166                         /*
1167                          * Binary search.  (note: small blocks will skip loop)
1168                          */
1169                         max = INT_GET(node->hdr.count, ARCH_CONVERT);
1170                         probe = span = max / 2;
1171                         hashval = args->hashval;
1172                         for (btree = &node->btree[probe]; span > 4;
1173                                    btree = &node->btree[probe]) {
1174                                 span /= 2;
1175                                 if (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)
1176                                         probe += span;
1177                                 else if (INT_GET(btree->hashval, ARCH_CONVERT) > hashval)
1178                                         probe -= span;
1179                                 else
1180                                         break;
1181                         }
1182                         ASSERT((probe >= 0) && (probe < max));
1183                         ASSERT((span <= 4) || (INT_GET(btree->hashval, ARCH_CONVERT) == hashval));
1184
1185                         /*
1186                          * Since we may have duplicate hashval's, find the first
1187                          * matching hashval in the node.
1188                          */
1189                         while ((probe > 0) && (INT_GET(btree->hashval, ARCH_CONVERT) >= hashval)) {
1190                                 btree--;
1191                                 probe--;
1192                         }
1193                         while ((probe < max) && (INT_GET(btree->hashval, ARCH_CONVERT) < hashval)) {
1194                                 btree++;
1195                                 probe++;
1196                         }
1197
1198                         /*
1199                          * Pick the right block to descend on.
1200                          */
1201                         if (probe == max) {
1202                                 blk->index = max-1;
1203                                 blkno = INT_GET(node->btree[ max-1 ].before, ARCH_CONVERT);
1204                         } else {
1205                                 blk->index = probe;
1206                                 blkno = INT_GET(btree->before, ARCH_CONVERT);
1207                         }
1208                 }
1209                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC) {
1210                         blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);
1211                         break;
1212                 }
1213                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
1214                         blk->hashval = xfs_dir_leaf_lasthash(blk->bp, NULL);
1215                         break;
1216                 }
1217                 else if (INT_GET(curr->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
1218                         blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL);
1219                         break;
1220                 }
1221         }
1222
1223         /*
1224          * A leaf block that ends in the hashval that we are interested in
1225          * (final hashval == search hashval) means that the next block may
1226          * contain more entries with the same hashval, shift upward to the
1227          * next leaf and keep searching.
1228          */
1229         for (;;) {
1230                 if (blk->magic == XFS_DIR_LEAF_MAGIC) {
1231                         ASSERT(XFS_DIR_IS_V1(state->mp));
1232                         retval = xfs_dir_leaf_lookup_int(blk->bp, args,
1233                                                                   &blk->index);
1234                 } else if (blk->magic == XFS_DIR2_LEAFN_MAGIC) {
1235                         ASSERT(XFS_DIR_IS_V2(state->mp));
1236                         retval = xfs_dir2_leafn_lookup_int(blk->bp, args,
1237                                                         &blk->index, state);
1238                 }
1239                 else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1240                         retval = xfs_attr_leaf_lookup_int(blk->bp, args);
1241                         blk->index = args->index;
1242                         args->blkno = blk->blkno;
1243                 }
1244                 if (((retval == ENOENT) || (retval == ENOATTR)) &&
1245                     (blk->hashval == args->hashval)) {
1246                         error = xfs_da_path_shift(state, &state->path, 1, 1,
1247                                                          &retval);
1248                         if (error)
1249                                 return(error);
1250                         if (retval == 0) {
1251                                 continue;
1252                         }
1253                         else if (blk->magic == XFS_ATTR_LEAF_MAGIC) {
1254                                 /* path_shift() gives ENOENT */
1255                                 retval = XFS_ERROR(ENOATTR);
1256                         }
1257                 }
1258                 break;
1259         }
1260         *result = retval;
1261         return(0);
1262 }
1263
1264 /*========================================================================
1265  * Utility routines.
1266  *========================================================================*/
1267
1268 /*
1269  * Link a new block into a doubly linked list of blocks (of whatever type).
1270  */
1271 int                                                     /* error */
1272 xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
1273                                xfs_da_state_blk_t *new_blk)
1274 {
1275         xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
1276         xfs_da_args_t *args;
1277         int before=0, error;
1278         xfs_dabuf_t *bp;
1279
1280         /*
1281          * Set up environment.
1282          */
1283         args = state->args;
1284         ASSERT(args != NULL);
1285         old_info = old_blk->bp->data;
1286         new_info = new_blk->bp->data;
1287         ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
1288                old_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1289                old_blk->magic == XFS_ATTR_LEAF_MAGIC);
1290         ASSERT(old_blk->magic == INT_GET(old_info->magic, ARCH_CONVERT));
1291         ASSERT(new_blk->magic == INT_GET(new_info->magic, ARCH_CONVERT));
1292         ASSERT(old_blk->magic == new_blk->magic);
1293
1294         switch (old_blk->magic) {
1295         case XFS_ATTR_LEAF_MAGIC:
1296                 before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);
1297                 break;
1298         case XFS_DIR_LEAF_MAGIC:
1299                 ASSERT(XFS_DIR_IS_V1(state->mp));
1300                 before = xfs_dir_leaf_order(old_blk->bp, new_blk->bp);
1301                 break;
1302         case XFS_DIR2_LEAFN_MAGIC:
1303                 ASSERT(XFS_DIR_IS_V2(state->mp));
1304                 before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp);
1305                 break;
1306         case XFS_DA_NODE_MAGIC:
1307                 before = xfs_da_node_order(old_blk->bp, new_blk->bp);
1308                 break;
1309         }
1310
1311         /*
1312          * Link blocks in appropriate order.
1313          */
1314         if (before) {
1315                 /*
1316                  * Link new block in before existing block.
1317                  */
1318                 INT_SET(new_info->forw, ARCH_CONVERT, old_blk->blkno);
1319                 new_info->back = old_info->back; /* INT_: direct copy */
1320                 if (INT_GET(old_info->back, ARCH_CONVERT)) {
1321                         error = xfs_da_read_buf(args->trans, args->dp,
1322                                                 INT_GET(old_info->back,
1323                                                         ARCH_CONVERT), -1, &bp,
1324                                                 args->whichfork);
1325                         if (error)
1326                                 return(error);
1327                         ASSERT(bp != NULL);
1328                         tmp_info = bp->data;
1329                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(old_info->magic, ARCH_CONVERT));
1330                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == old_blk->blkno);
1331                         INT_SET(tmp_info->forw, ARCH_CONVERT, new_blk->blkno);
1332                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1333                         xfs_da_buf_done(bp);
1334                 }
1335                 INT_SET(old_info->back, ARCH_CONVERT, new_blk->blkno);
1336         } else {
1337                 /*
1338                  * Link new block in after existing block.
1339                  */
1340                 new_info->forw = old_info->forw; /* INT_: direct copy */
1341                 INT_SET(new_info->back, ARCH_CONVERT, old_blk->blkno);
1342                 if (INT_GET(old_info->forw, ARCH_CONVERT)) {
1343                         error = xfs_da_read_buf(args->trans, args->dp,
1344                                                 INT_GET(old_info->forw, ARCH_CONVERT), -1, &bp,
1345                                                 args->whichfork);
1346                         if (error)
1347                                 return(error);
1348                         ASSERT(bp != NULL);
1349                         tmp_info = bp->data;
1350                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1351                                     == INT_GET(old_info->magic, ARCH_CONVERT));
1352                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1353                                     == old_blk->blkno);
1354                         INT_SET(tmp_info->back, ARCH_CONVERT, new_blk->blkno);
1355                         xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
1356                         xfs_da_buf_done(bp);
1357                 }
1358                 INT_SET(old_info->forw, ARCH_CONVERT, new_blk->blkno);
1359         }
1360
1361         xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
1362         xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
1363         return(0);
1364 }
1365
1366 /*
1367  * Compare two intermediate nodes for "order".
1368  */
1369 STATIC int
1370 xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
1371 {
1372         xfs_da_intnode_t *node1, *node2;
1373
1374         node1 = node1_bp->data;
1375         node2 = node2_bp->data;
1376         ASSERT((INT_GET(node1->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) &&
1377                (INT_GET(node2->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC));
1378         if ((INT_GET(node1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(node2->hdr.count, ARCH_CONVERT) > 0) &&
1379             ((INT_GET(node2->btree[ 0 ].hashval, ARCH_CONVERT) <
1380               INT_GET(node1->btree[ 0 ].hashval, ARCH_CONVERT)) ||
1381              (INT_GET(node2->btree[ INT_GET(node2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1382               INT_GET(node1->btree[ INT_GET(node1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
1383                 return(1);
1384         }
1385         return(0);
1386 }
1387
1388 /*
1389  * Pick up the last hashvalue from an intermediate node.
1390  */
1391 STATIC uint
1392 xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
1393 {
1394         xfs_da_intnode_t *node;
1395
1396         node = bp->data;
1397         ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1398         if (count)
1399                 *count = INT_GET(node->hdr.count, ARCH_CONVERT);
1400         if (!node->hdr.count)
1401                 return(0);
1402         return(INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
1403 }
1404
1405 /*
1406  * Unlink a block from a doubly linked list of blocks.
1407  */
1408 STATIC int                                              /* error */
1409 xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1410                                  xfs_da_state_blk_t *save_blk)
1411 {
1412         xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
1413         xfs_da_args_t *args;
1414         xfs_dabuf_t *bp;
1415         int error;
1416
1417         /*
1418          * Set up environment.
1419          */
1420         args = state->args;
1421         ASSERT(args != NULL);
1422         save_info = save_blk->bp->data;
1423         drop_info = drop_blk->bp->data;
1424         ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
1425                save_blk->magic == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1426                save_blk->magic == XFS_ATTR_LEAF_MAGIC);
1427         ASSERT(save_blk->magic == INT_GET(save_info->magic, ARCH_CONVERT));
1428         ASSERT(drop_blk->magic == INT_GET(drop_info->magic, ARCH_CONVERT));
1429         ASSERT(save_blk->magic == drop_blk->magic);
1430         ASSERT((INT_GET(save_info->forw, ARCH_CONVERT) == drop_blk->blkno) ||
1431                (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno));
1432         ASSERT((INT_GET(drop_info->forw, ARCH_CONVERT) == save_blk->blkno) ||
1433                (INT_GET(drop_info->back, ARCH_CONVERT) == save_blk->blkno));
1434
1435         /*
1436          * Unlink the leaf block from the doubly linked chain of leaves.
1437          */
1438         if (INT_GET(save_info->back, ARCH_CONVERT) == drop_blk->blkno) {
1439                 save_info->back = drop_info->back; /* INT_: direct copy */
1440                 if (INT_GET(drop_info->back, ARCH_CONVERT)) {
1441                         error = xfs_da_read_buf(args->trans, args->dp,
1442                                                 INT_GET(drop_info->back,
1443                                                         ARCH_CONVERT), -1, &bp,
1444                                                 args->whichfork);
1445                         if (error)
1446                                 return(error);
1447                         ASSERT(bp != NULL);
1448                         tmp_info = bp->data;
1449                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT) == INT_GET(save_info->magic, ARCH_CONVERT));
1450                         ASSERT(INT_GET(tmp_info->forw, ARCH_CONVERT) == drop_blk->blkno);
1451                         INT_SET(tmp_info->forw, ARCH_CONVERT, save_blk->blkno);
1452                         xfs_da_log_buf(args->trans, bp, 0,
1453                                                     sizeof(*tmp_info) - 1);
1454                         xfs_da_buf_done(bp);
1455                 }
1456         } else {
1457                 save_info->forw = drop_info->forw; /* INT_: direct copy */
1458                 if (INT_GET(drop_info->forw, ARCH_CONVERT)) {
1459                         error = xfs_da_read_buf(args->trans, args->dp,
1460                                                 INT_GET(drop_info->forw, ARCH_CONVERT), -1, &bp,
1461                                                 args->whichfork);
1462                         if (error)
1463                                 return(error);
1464                         ASSERT(bp != NULL);
1465                         tmp_info = bp->data;
1466                         ASSERT(INT_GET(tmp_info->magic, ARCH_CONVERT)
1467                                     == INT_GET(save_info->magic, ARCH_CONVERT));
1468                         ASSERT(INT_GET(tmp_info->back, ARCH_CONVERT)
1469                                     == drop_blk->blkno);
1470                         INT_SET(tmp_info->back, ARCH_CONVERT, save_blk->blkno);
1471                         xfs_da_log_buf(args->trans, bp, 0,
1472                                                     sizeof(*tmp_info) - 1);
1473                         xfs_da_buf_done(bp);
1474                 }
1475         }
1476
1477         xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
1478         return(0);
1479 }
1480
1481 /*
1482  * Move a path "forward" or "!forward" one block at the current level.
1483  *
1484  * This routine will adjust a "path" to point to the next block
1485  * "forward" (higher hashvalues) or "!forward" (lower hashvals) in the
1486  * Btree, including updating pointers to the intermediate nodes between
1487  * the new bottom and the root.
1488  */
1489 int                                                     /* error */
1490 xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
1491                                  int forward, int release, int *result)
1492 {
1493         xfs_da_state_blk_t *blk;
1494         xfs_da_blkinfo_t *info;
1495         xfs_da_intnode_t *node;
1496         xfs_da_args_t *args;
1497         xfs_dablk_t blkno=0;
1498         int level, error;
1499
1500         /*
1501          * Roll up the Btree looking for the first block where our
1502          * current index is not at the edge of the block.  Note that
1503          * we skip the bottom layer because we want the sibling block.
1504          */
1505         args = state->args;
1506         ASSERT(args != NULL);
1507         ASSERT(path != NULL);
1508         ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1509         level = (path->active-1) - 1;   /* skip bottom layer in path */
1510         for (blk = &path->blk[level]; level >= 0; blk--, level--) {
1511                 ASSERT(blk->bp != NULL);
1512                 node = blk->bp->data;
1513                 ASSERT(INT_GET(node->hdr.info.magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1514                 if (forward && (blk->index < INT_GET(node->hdr.count, ARCH_CONVERT)-1)) {
1515                         blk->index++;
1516                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1517                         break;
1518                 } else if (!forward && (blk->index > 0)) {
1519                         blk->index--;
1520                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1521                         break;
1522                 }
1523         }
1524         if (level < 0) {
1525                 *result = XFS_ERROR(ENOENT);    /* we're out of our tree */
1526                 ASSERT(args->oknoent);
1527                 return(0);
1528         }
1529
1530         /*
1531          * Roll down the edge of the subtree until we reach the
1532          * same depth we were at originally.
1533          */
1534         for (blk++, level++; level < path->active; blk++, level++) {
1535                 /*
1536                  * Release the old block.
1537                  * (if it's dirty, trans won't actually let go)
1538                  */
1539                 if (release)
1540                         xfs_da_brelse(args->trans, blk->bp);
1541
1542                 /*
1543                  * Read the next child block.
1544                  */
1545                 blk->blkno = blkno;
1546                 error = xfs_da_read_buf(args->trans, args->dp, blkno, -1,
1547                                                      &blk->bp, args->whichfork);
1548                 if (error)
1549                         return(error);
1550                 ASSERT(blk->bp != NULL);
1551                 info = blk->bp->data;
1552                 ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC ||
1553                        INT_GET(info->magic, ARCH_CONVERT) == XFS_DIRX_LEAF_MAGIC(state->mp) ||
1554                        INT_GET(info->magic, ARCH_CONVERT) == XFS_ATTR_LEAF_MAGIC);
1555                 blk->magic = INT_GET(info->magic, ARCH_CONVERT);
1556                 if (INT_GET(info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC) {
1557                         node = (xfs_da_intnode_t *)info;
1558                         blk->hashval = INT_GET(node->btree[ INT_GET(node->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1559                         if (forward)
1560                                 blk->index = 0;
1561                         else
1562                                 blk->index = INT_GET(node->hdr.count, ARCH_CONVERT)-1;
1563                         blkno = INT_GET(node->btree[ blk->index ].before, ARCH_CONVERT);
1564                 } else {
1565                         ASSERT(level == path->active-1);
1566                         blk->index = 0;
1567                         switch(blk->magic) {
1568                         case XFS_ATTR_LEAF_MAGIC:
1569                                 blk->hashval = xfs_attr_leaf_lasthash(blk->bp,
1570                                                                       NULL);
1571                                 break;
1572                         case XFS_DIR_LEAF_MAGIC:
1573                                 ASSERT(XFS_DIR_IS_V1(state->mp));
1574                                 blk->hashval = xfs_dir_leaf_lasthash(blk->bp,
1575                                                                      NULL);
1576                                 break;
1577                         case XFS_DIR2_LEAFN_MAGIC:
1578                                 ASSERT(XFS_DIR_IS_V2(state->mp));
1579                                 blk->hashval = xfs_dir2_leafn_lasthash(blk->bp,
1580                                                                        NULL);
1581                                 break;
1582                         default:
1583                                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC ||
1584                                        blk->magic ==
1585                                        XFS_DIRX_LEAF_MAGIC(state->mp));
1586                                 break;
1587                         }
1588                 }
1589         }
1590         *result = 0;
1591         return(0);
1592 }
1593
1594
1595 /*========================================================================
1596  * Utility routines.
1597  *========================================================================*/
1598
1599 /*
1600  * Implement a simple hash on a character string.
1601  * Rotate the hash value by 7 bits, then XOR each character in.
1602  * This is implemented with some source-level loop unrolling.
1603  */
1604 xfs_dahash_t
1605 xfs_da_hashname(const uchar_t *name, int namelen)
1606 {
1607         xfs_dahash_t hash;
1608
1609 #ifdef SLOWVERSION
1610         /*
1611          * This is the old one-byte-at-a-time version.
1612          */
1613         for (hash = 0; namelen > 0; namelen--)
1614                 hash = *name++ ^ rol32(hash, 7);
1615
1616         return(hash);
1617 #else
1618         /*
1619          * Do four characters at a time as long as we can.
1620          */
1621         for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
1622                 hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
1623                        (name[3] << 0) ^ rol32(hash, 7 * 4);
1624
1625         /*
1626          * Now do the rest of the characters.
1627          */
1628         switch (namelen) {
1629         case 3:
1630                 return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
1631                        rol32(hash, 7 * 3);
1632         case 2:
1633                 return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
1634         case 1:
1635                 return (name[0] << 0) ^ rol32(hash, 7 * 1);
1636         case 0:
1637                 return hash;
1638         }
1639         /* NOTREACHED */
1640 #endif
1641         return 0; /* keep gcc happy */
1642 }
1643
1644 /*
1645  * Add a block to the btree ahead of the file.
1646  * Return the new block number to the caller.
1647  */
1648 int
1649 xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
1650 {
1651         xfs_fileoff_t bno, b;
1652         xfs_bmbt_irec_t map;
1653         xfs_bmbt_irec_t *mapp;
1654         xfs_inode_t *dp;
1655         int nmap, error, w, count, c, got, i, mapi;
1656         xfs_fsize_t size;
1657         xfs_trans_t *tp;
1658         xfs_mount_t *mp;
1659
1660         dp = args->dp;
1661         mp = dp->i_mount;
1662         w = args->whichfork;
1663         tp = args->trans;
1664         /*
1665          * For new directories adjust the file offset and block count.
1666          */
1667         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp)) {
1668                 bno = mp->m_dirleafblk;
1669                 count = mp->m_dirblkfsbs;
1670         } else {
1671                 bno = 0;
1672                 count = 1;
1673         }
1674         /*
1675          * Find a spot in the file space to put the new block.
1676          */
1677         if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w))) {
1678                 return error;
1679         }
1680         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1681                 ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
1682         /*
1683          * Try mapping it in one filesystem block.
1684          */
1685         nmap = 1;
1686         ASSERT(args->firstblock != NULL);
1687         if ((error = xfs_bmapi(tp, dp, bno, count,
1688                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
1689                         XFS_BMAPI_CONTIG,
1690                         args->firstblock, args->total, &map, &nmap,
1691                         args->flist))) {
1692                 return error;
1693         }
1694         ASSERT(nmap <= 1);
1695         if (nmap == 1) {
1696                 mapp = &map;
1697                 mapi = 1;
1698         }
1699         /*
1700          * If we didn't get it and the block might work if fragmented,
1701          * try without the CONTIG flag.  Loop until we get it all.
1702          */
1703         else if (nmap == 0 && count > 1) {
1704                 mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
1705                 for (b = bno, mapi = 0; b < bno + count; ) {
1706                         nmap = MIN(XFS_BMAP_MAX_NMAP, count);
1707                         c = (int)(bno + count - b);
1708                         if ((error = xfs_bmapi(tp, dp, b, c,
1709                                         XFS_BMAPI_AFLAG(w)|XFS_BMAPI_WRITE|
1710                                         XFS_BMAPI_METADATA,
1711                                         args->firstblock, args->total,
1712                                         &mapp[mapi], &nmap, args->flist))) {
1713                                 kmem_free(mapp, sizeof(*mapp) * count);
1714                                 return error;
1715                         }
1716                         if (nmap < 1)
1717                                 break;
1718                         mapi += nmap;
1719                         b = mapp[mapi - 1].br_startoff +
1720                             mapp[mapi - 1].br_blockcount;
1721                 }
1722         } else {
1723                 mapi = 0;
1724                 mapp = NULL;
1725         }
1726         /*
1727          * Count the blocks we got, make sure it matches the total.
1728          */
1729         for (i = 0, got = 0; i < mapi; i++)
1730                 got += mapp[i].br_blockcount;
1731         if (got != count || mapp[0].br_startoff != bno ||
1732             mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
1733             bno + count) {
1734                 if (mapp != &map)
1735                         kmem_free(mapp, sizeof(*mapp) * count);
1736                 return XFS_ERROR(ENOSPC);
1737         }
1738         if (mapp != &map)
1739                 kmem_free(mapp, sizeof(*mapp) * count);
1740         *new_blkno = (xfs_dablk_t)bno;
1741         /*
1742          * For version 1 directories, adjust the file size if it changed.
1743          */
1744         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
1745                 ASSERT(mapi == 1);
1746                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
1747                         return error;
1748                 size = XFS_FSB_TO_B(mp, bno);
1749                 if (size != dp->i_d.di_size) {
1750                         dp->i_d.di_size = size;
1751                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
1752                 }
1753         }
1754         return 0;
1755 }
1756
1757 /*
1758  * Ick.  We need to always be able to remove a btree block, even
1759  * if there's no space reservation because the filesystem is full.
1760  * This is called if xfs_bunmapi on a btree block fails due to ENOSPC.
1761  * It swaps the target block with the last block in the file.  The
1762  * last block in the file can always be removed since it can't cause
1763  * a bmap btree split to do that.
1764  */
1765 STATIC int
1766 xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
1767                       xfs_dabuf_t **dead_bufp)
1768 {
1769         xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
1770         xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
1771         xfs_fileoff_t lastoff;
1772         xfs_inode_t *ip;
1773         xfs_trans_t *tp;
1774         xfs_mount_t *mp;
1775         int error, w, entno, level, dead_level;
1776         xfs_da_blkinfo_t *dead_info, *sib_info;
1777         xfs_da_intnode_t *par_node, *dead_node;
1778         xfs_dir_leafblock_t *dead_leaf;
1779         xfs_dir2_leaf_t *dead_leaf2;
1780         xfs_dahash_t dead_hash;
1781
1782         dead_buf = *dead_bufp;
1783         dead_blkno = *dead_blknop;
1784         tp = args->trans;
1785         ip = args->dp;
1786         w = args->whichfork;
1787         ASSERT(w == XFS_DATA_FORK);
1788         mp = ip->i_mount;
1789         if (XFS_DIR_IS_V2(mp)) {
1790                 lastoff = mp->m_dirfreeblk;
1791                 error = xfs_bmap_last_before(tp, ip, &lastoff, w);
1792         } else
1793                 error = xfs_bmap_last_offset(tp, ip, &lastoff, w);
1794         if (error)
1795                 return error;
1796         if (unlikely(lastoff == 0)) {
1797                 XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
1798                                  mp);
1799                 return XFS_ERROR(EFSCORRUPTED);
1800         }
1801         /*
1802          * Read the last block in the btree space.
1803          */
1804         last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs;
1805         if ((error = xfs_da_read_buf(tp, ip, last_blkno, -1, &last_buf, w)))
1806                 return error;
1807         /*
1808          * Copy the last block into the dead buffer and log it.
1809          */
1810         memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
1811         xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
1812         dead_info = dead_buf->data;
1813         /*
1814          * Get values from the moved block.
1815          */
1816         if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) {
1817                 ASSERT(XFS_DIR_IS_V1(mp));
1818                 dead_leaf = (xfs_dir_leafblock_t *)dead_info;
1819                 dead_level = 0;
1820                 dead_hash =
1821                         INT_GET(dead_leaf->entries[INT_GET(dead_leaf->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1822         } else if (INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DIR2_LEAFN_MAGIC) {
1823                 ASSERT(XFS_DIR_IS_V2(mp));
1824                 dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
1825                 dead_level = 0;
1826                 dead_hash = INT_GET(dead_leaf2->ents[INT_GET(dead_leaf2->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1827         } else {
1828                 ASSERT(INT_GET(dead_info->magic, ARCH_CONVERT) == XFS_DA_NODE_MAGIC);
1829                 dead_node = (xfs_da_intnode_t *)dead_info;
1830                 dead_level = INT_GET(dead_node->hdr.level, ARCH_CONVERT);
1831                 dead_hash = INT_GET(dead_node->btree[INT_GET(dead_node->hdr.count, ARCH_CONVERT) - 1].hashval, ARCH_CONVERT);
1832         }
1833         sib_buf = par_buf = NULL;
1834         /*
1835          * If the moved block has a left sibling, fix up the pointers.
1836          */
1837         if ((sib_blkno = INT_GET(dead_info->back, ARCH_CONVERT))) {
1838                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1839                         goto done;
1840                 sib_info = sib_buf->data;
1841                 if (unlikely(
1842                     INT_GET(sib_info->forw, ARCH_CONVERT) != last_blkno ||
1843                     INT_GET(sib_info->magic, ARCH_CONVERT) != INT_GET(dead_info->magic, ARCH_CONVERT))) {
1844                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
1845                                          XFS_ERRLEVEL_LOW, mp);
1846                         error = XFS_ERROR(EFSCORRUPTED);
1847                         goto done;
1848                 }
1849                 INT_SET(sib_info->forw, ARCH_CONVERT, dead_blkno);
1850                 xfs_da_log_buf(tp, sib_buf,
1851                         XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
1852                                         sizeof(sib_info->forw)));
1853                 xfs_da_buf_done(sib_buf);
1854                 sib_buf = NULL;
1855         }
1856         /*
1857          * If the moved block has a right sibling, fix up the pointers.
1858          */
1859         if ((sib_blkno = INT_GET(dead_info->forw, ARCH_CONVERT))) {
1860                 if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
1861                         goto done;
1862                 sib_info = sib_buf->data;
1863                 if (unlikely(
1864                        INT_GET(sib_info->back, ARCH_CONVERT) != last_blkno
1865                     || INT_GET(sib_info->magic, ARCH_CONVERT)
1866                                 != INT_GET(dead_info->magic, ARCH_CONVERT))) {
1867                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
1868                                          XFS_ERRLEVEL_LOW, mp);
1869                         error = XFS_ERROR(EFSCORRUPTED);
1870                         goto done;
1871                 }
1872                 INT_SET(sib_info->back, ARCH_CONVERT, dead_blkno);
1873                 xfs_da_log_buf(tp, sib_buf,
1874                         XFS_DA_LOGRANGE(sib_info, &sib_info->back,
1875                                         sizeof(sib_info->back)));
1876                 xfs_da_buf_done(sib_buf);
1877                 sib_buf = NULL;
1878         }
1879         par_blkno = XFS_DIR_IS_V1(mp) ? 0 : mp->m_dirleafblk;
1880         level = -1;
1881         /*
1882          * Walk down the tree looking for the parent of the moved block.
1883          */
1884         for (;;) {
1885                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1886                         goto done;
1887                 par_node = par_buf->data;
1888                 if (unlikely(
1889                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC ||
1890                     (level >= 0 && level != INT_GET(par_node->hdr.level, ARCH_CONVERT) + 1))) {
1891                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
1892                                          XFS_ERRLEVEL_LOW, mp);
1893                         error = XFS_ERROR(EFSCORRUPTED);
1894                         goto done;
1895                 }
1896                 level = INT_GET(par_node->hdr.level, ARCH_CONVERT);
1897                 for (entno = 0;
1898                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
1899                      INT_GET(par_node->btree[entno].hashval, ARCH_CONVERT) < dead_hash;
1900                      entno++)
1901                         continue;
1902                 if (unlikely(entno == INT_GET(par_node->hdr.count, ARCH_CONVERT))) {
1903                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
1904                                          XFS_ERRLEVEL_LOW, mp);
1905                         error = XFS_ERROR(EFSCORRUPTED);
1906                         goto done;
1907                 }
1908                 par_blkno = INT_GET(par_node->btree[entno].before, ARCH_CONVERT);
1909                 if (level == dead_level + 1)
1910                         break;
1911                 xfs_da_brelse(tp, par_buf);
1912                 par_buf = NULL;
1913         }
1914         /*
1915          * We're in the right parent block.
1916          * Look for the right entry.
1917          */
1918         for (;;) {
1919                 for (;
1920                      entno < INT_GET(par_node->hdr.count, ARCH_CONVERT) &&
1921                      INT_GET(par_node->btree[entno].before, ARCH_CONVERT) != last_blkno;
1922                      entno++)
1923                         continue;
1924                 if (entno < INT_GET(par_node->hdr.count, ARCH_CONVERT))
1925                         break;
1926                 par_blkno = INT_GET(par_node->hdr.info.forw, ARCH_CONVERT);
1927                 xfs_da_brelse(tp, par_buf);
1928                 par_buf = NULL;
1929                 if (unlikely(par_blkno == 0)) {
1930                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
1931                                          XFS_ERRLEVEL_LOW, mp);
1932                         error = XFS_ERROR(EFSCORRUPTED);
1933                         goto done;
1934                 }
1935                 if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
1936                         goto done;
1937                 par_node = par_buf->data;
1938                 if (unlikely(
1939                     INT_GET(par_node->hdr.level, ARCH_CONVERT) != level ||
1940                     INT_GET(par_node->hdr.info.magic, ARCH_CONVERT) != XFS_DA_NODE_MAGIC)) {
1941                         XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
1942                                          XFS_ERRLEVEL_LOW, mp);
1943                         error = XFS_ERROR(EFSCORRUPTED);
1944                         goto done;
1945                 }
1946                 entno = 0;
1947         }
1948         /*
1949          * Update the parent entry pointing to the moved block.
1950          */
1951         INT_SET(par_node->btree[entno].before, ARCH_CONVERT, dead_blkno);
1952         xfs_da_log_buf(tp, par_buf,
1953                 XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
1954                                 sizeof(par_node->btree[entno].before)));
1955         xfs_da_buf_done(par_buf);
1956         xfs_da_buf_done(dead_buf);
1957         *dead_blknop = last_blkno;
1958         *dead_bufp = last_buf;
1959         return 0;
1960 done:
1961         if (par_buf)
1962                 xfs_da_brelse(tp, par_buf);
1963         if (sib_buf)
1964                 xfs_da_brelse(tp, sib_buf);
1965         xfs_da_brelse(tp, last_buf);
1966         return error;
1967 }
1968
1969 /*
1970  * Remove a btree block from a directory or attribute.
1971  */
1972 int
1973 xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
1974                     xfs_dabuf_t *dead_buf)
1975 {
1976         xfs_inode_t *dp;
1977         int done, error, w, count;
1978         xfs_fileoff_t bno;
1979         xfs_fsize_t size;
1980         xfs_trans_t *tp;
1981         xfs_mount_t *mp;
1982
1983         dp = args->dp;
1984         w = args->whichfork;
1985         tp = args->trans;
1986         mp = dp->i_mount;
1987         if (w == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
1988                 count = mp->m_dirblkfsbs;
1989         else
1990                 count = 1;
1991         for (;;) {
1992                 /*
1993                  * Remove extents.  If we get ENOSPC for a dir we have to move
1994                  * the last block to the place we want to kill.
1995                  */
1996                 if ((error = xfs_bunmapi(tp, dp, dead_blkno, count,
1997                                 XFS_BMAPI_AFLAG(w)|XFS_BMAPI_METADATA,
1998                                 0, args->firstblock, args->flist,
1999                                 &done)) == ENOSPC) {
2000                         if (w != XFS_DATA_FORK)
2001                                 goto done;
2002                         if ((error = xfs_da_swap_lastblock(args, &dead_blkno,
2003                                         &dead_buf)))
2004                                 goto done;
2005                 } else if (error)
2006                         goto done;
2007                 else
2008                         break;
2009         }
2010         ASSERT(done);
2011         xfs_da_binval(tp, dead_buf);
2012         /*
2013          * Adjust the directory size for version 1.
2014          */
2015         if (w == XFS_DATA_FORK && XFS_DIR_IS_V1(mp)) {
2016                 if ((error = xfs_bmap_last_offset(tp, dp, &bno, w)))
2017                         return error;
2018                 size = XFS_FSB_TO_B(dp->i_mount, bno);
2019                 if (size != dp->i_d.di_size) {
2020                         dp->i_d.di_size = size;
2021                         xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
2022                 }
2023         }
2024         return 0;
2025 done:
2026         xfs_da_binval(tp, dead_buf);
2027         return error;
2028 }
2029
2030 /*
2031  * See if the mapping(s) for this btree block are valid, i.e.
2032  * don't contain holes, are logically contiguous, and cover the whole range.
2033  */
2034 STATIC int
2035 xfs_da_map_covers_blocks(
2036         int             nmap,
2037         xfs_bmbt_irec_t *mapp,
2038         xfs_dablk_t     bno,
2039         int             count)
2040 {
2041         int             i;
2042         xfs_fileoff_t   off;
2043
2044         for (i = 0, off = bno; i < nmap; i++) {
2045                 if (mapp[i].br_startblock == HOLESTARTBLOCK ||
2046                     mapp[i].br_startblock == DELAYSTARTBLOCK) {
2047                         return 0;
2048                 }
2049                 if (off != mapp[i].br_startoff) {
2050                         return 0;
2051                 }
2052                 off += mapp[i].br_blockcount;
2053         }
2054         return off == bno + count;
2055 }
2056
2057 /*
2058  * Make a dabuf.
2059  * Used for get_buf, read_buf, read_bufr, and reada_buf.
2060  */
2061 STATIC int
2062 xfs_da_do_buf(
2063         xfs_trans_t     *trans,
2064         xfs_inode_t     *dp,
2065         xfs_dablk_t     bno,
2066         xfs_daddr_t     *mappedbnop,
2067         xfs_dabuf_t     **bpp,
2068         int             whichfork,
2069         int             caller,
2070         inst_t          *ra)
2071 {
2072         xfs_buf_t       *bp = NULL;
2073         xfs_buf_t       **bplist;
2074         int             error=0;
2075         int             i;
2076         xfs_bmbt_irec_t map;
2077         xfs_bmbt_irec_t *mapp;
2078         xfs_daddr_t     mappedbno;
2079         xfs_mount_t     *mp;
2080         int             nbplist=0;
2081         int             nfsb;
2082         int             nmap;
2083         xfs_dabuf_t     *rbp;
2084
2085         mp = dp->i_mount;
2086         if (whichfork == XFS_DATA_FORK && XFS_DIR_IS_V2(mp))
2087                 nfsb = mp->m_dirblkfsbs;
2088         else
2089                 nfsb = 1;
2090         mappedbno = *mappedbnop;
2091         /*
2092          * Caller doesn't have a mapping.  -2 means don't complain
2093          * if we land in a hole.
2094          */
2095         if (mappedbno == -1 || mappedbno == -2) {
2096                 /*
2097                  * Optimize the one-block case.
2098                  */
2099                 if (nfsb == 1) {
2100                         xfs_fsblock_t   fsb;
2101
2102                         if ((error =
2103                             xfs_bmapi_single(trans, dp, whichfork, &fsb,
2104                                     (xfs_fileoff_t)bno))) {
2105                                 return error;
2106                         }
2107                         mapp = &map;
2108                         if (fsb == NULLFSBLOCK) {
2109                                 nmap = 0;
2110                         } else {
2111                                 map.br_startblock = fsb;
2112                                 map.br_startoff = (xfs_fileoff_t)bno;
2113                                 map.br_blockcount = 1;
2114                                 nmap = 1;
2115                         }
2116                 } else {
2117                         mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
2118                         nmap = nfsb;
2119                         if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
2120                                         nfsb,
2121                                         XFS_BMAPI_METADATA |
2122                                                 XFS_BMAPI_AFLAG(whichfork),
2123                                         NULL, 0, mapp, &nmap, NULL)))
2124                                 goto exit0;
2125                 }
2126         } else {
2127                 map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
2128                 map.br_startoff = (xfs_fileoff_t)bno;
2129                 map.br_blockcount = nfsb;
2130                 mapp = &map;
2131                 nmap = 1;
2132         }
2133         if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
2134                 error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
2135                 if (unlikely(error == EFSCORRUPTED)) {
2136                         if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
2137                                 int     i;
2138                                 cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n",
2139                                         (long long)bno);
2140                                 cmn_err(CE_ALERT, "dir: inode %lld\n",
2141                                         (long long)dp->i_ino);
2142                                 for (i = 0; i < nmap; i++) {
2143                                         cmn_err(CE_ALERT,
2144                                                 "[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d\n",
2145                                                 i,
2146                                                 (long long)mapp[i].br_startoff,
2147                                                 (long long)mapp[i].br_startblock,
2148                                                 (long long)mapp[i].br_blockcount,
2149                                                 mapp[i].br_state);
2150                                 }
2151                         }
2152                         XFS_ERROR_REPORT("xfs_da_do_buf(1)",
2153                                          XFS_ERRLEVEL_LOW, mp);
2154                 }
2155                 goto exit0;
2156         }
2157         if (caller != 3 && nmap > 1) {
2158                 bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
2159                 nbplist = 0;
2160         } else
2161                 bplist = NULL;
2162         /*
2163          * Turn the mapping(s) into buffer(s).
2164          */
2165         for (i = 0; i < nmap; i++) {
2166                 int     nmapped;
2167
2168                 mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
2169                 if (i == 0)
2170                         *mappedbnop = mappedbno;
2171                 nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
2172                 switch (caller) {
2173                 case 0:
2174                         bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
2175                                 mappedbno, nmapped, 0);
2176                         error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
2177                         break;
2178                 case 1:
2179                 case 2:
2180                         bp = NULL;
2181                         error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
2182                                 mappedbno, nmapped, 0, &bp);
2183                         break;
2184                 case 3:
2185                         xfs_baread(mp->m_ddev_targp, mappedbno, nmapped);
2186                         error = 0;
2187                         bp = NULL;
2188                         break;
2189                 }
2190                 if (error) {
2191                         if (bp)
2192                                 xfs_trans_brelse(trans, bp);
2193                         goto exit1;
2194                 }
2195                 if (!bp)
2196                         continue;
2197                 if (caller == 1) {
2198                         if (whichfork == XFS_ATTR_FORK) {
2199                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
2200                                                 XFS_ATTR_BTREE_REF);
2201                         } else {
2202                                 XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
2203                                                 XFS_DIR_BTREE_REF);
2204                         }
2205                 }
2206                 if (bplist) {
2207                         bplist[nbplist++] = bp;
2208                 }
2209         }
2210         /*
2211          * Build a dabuf structure.
2212          */
2213         if (bplist) {
2214                 rbp = xfs_da_buf_make(nbplist, bplist, ra);
2215         } else if (bp)
2216                 rbp = xfs_da_buf_make(1, &bp, ra);
2217         else
2218                 rbp = NULL;
2219         /*
2220          * For read_buf, check the magic number.
2221          */
2222         if (caller == 1) {
2223                 xfs_dir2_data_t         *data;
2224                 xfs_dir2_free_t         *free;
2225                 xfs_da_blkinfo_t        *info;
2226                 uint                    magic, magic1;
2227
2228                 info = rbp->data;
2229                 data = rbp->data;
2230                 free = rbp->data;
2231                 magic = INT_GET(info->magic, ARCH_CONVERT);
2232                 magic1 = INT_GET(data->hdr.magic, ARCH_CONVERT);
2233                 if (unlikely(
2234                     XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
2235                                    (magic != XFS_DIR_LEAF_MAGIC) &&
2236                                    (magic != XFS_ATTR_LEAF_MAGIC) &&
2237                                    (magic != XFS_DIR2_LEAF1_MAGIC) &&
2238                                    (magic != XFS_DIR2_LEAFN_MAGIC) &&
2239                                    (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
2240                                    (magic1 != XFS_DIR2_DATA_MAGIC) &&
2241                                    (INT_GET(free->hdr.magic, ARCH_CONVERT) != XFS_DIR2_FREE_MAGIC),
2242                                 mp, XFS_ERRTAG_DA_READ_BUF,
2243                                 XFS_RANDOM_DA_READ_BUF))) {
2244                         xfs_buftrace("DA READ ERROR", rbp->bps[0]);
2245                         XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
2246                                              XFS_ERRLEVEL_LOW, mp, info);
2247                         error = XFS_ERROR(EFSCORRUPTED);
2248                         xfs_da_brelse(trans, rbp);
2249                         nbplist = 0;
2250                         goto exit1;
2251                 }
2252         }
2253         if (bplist) {
2254                 kmem_free(bplist, sizeof(*bplist) * nmap);
2255         }
2256         if (mapp != &map) {
2257                 kmem_free(mapp, sizeof(*mapp) * nfsb);
2258         }
2259         if (bpp)
2260                 *bpp = rbp;
2261         return 0;
2262 exit1:
2263         if (bplist) {
2264                 for (i = 0; i < nbplist; i++)
2265                         xfs_trans_brelse(trans, bplist[i]);
2266                 kmem_free(bplist, sizeof(*bplist) * nmap);
2267         }
2268 exit0:
2269         if (mapp != &map)
2270                 kmem_free(mapp, sizeof(*mapp) * nfsb);
2271         if (bpp)
2272                 *bpp = NULL;
2273         return error;
2274 }
2275
2276 /*
2277  * Get a buffer for the dir/attr block.
2278  */
2279 int
2280 xfs_da_get_buf(
2281         xfs_trans_t     *trans,
2282         xfs_inode_t     *dp,
2283         xfs_dablk_t     bno,
2284         xfs_daddr_t             mappedbno,
2285         xfs_dabuf_t     **bpp,
2286         int             whichfork)
2287 {
2288         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
2289                                                  (inst_t *)__return_address);
2290 }
2291
2292 /*
2293  * Get a buffer for the dir/attr block, fill in the contents.
2294  */
2295 int
2296 xfs_da_read_buf(
2297         xfs_trans_t     *trans,
2298         xfs_inode_t     *dp,
2299         xfs_dablk_t     bno,
2300         xfs_daddr_t             mappedbno,
2301         xfs_dabuf_t     **bpp,
2302         int             whichfork)
2303 {
2304         return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
2305                 (inst_t *)__return_address);
2306 }
2307
2308 /*
2309  * Readahead the dir/attr block.
2310  */
2311 xfs_daddr_t
2312 xfs_da_reada_buf(
2313         xfs_trans_t     *trans,
2314         xfs_inode_t     *dp,
2315         xfs_dablk_t     bno,
2316         int             whichfork)
2317 {
2318         xfs_daddr_t             rval;
2319
2320         rval = -1;
2321         if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
2322                         (inst_t *)__return_address))
2323                 return -1;
2324         else
2325                 return rval;
2326 }
2327
2328 /*
2329  * Calculate the number of bits needed to hold i different values.
2330  */
2331 uint
2332 xfs_da_log2_roundup(uint i)
2333 {
2334         uint rval;
2335
2336         for (rval = 0; rval < NBBY * sizeof(i); rval++) {
2337                 if ((1 << rval) >= i)
2338                         break;
2339         }
2340         return(rval);
2341 }
2342
2343 kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
2344 kmem_zone_t *xfs_dabuf_zone;            /* dabuf zone */
2345
2346 /*
2347  * Allocate a dir-state structure.
2348  * We don't put them on the stack since they're large.
2349  */
2350 xfs_da_state_t *
2351 xfs_da_state_alloc(void)
2352 {
2353         return kmem_zone_zalloc(xfs_da_state_zone, KM_SLEEP);
2354 }
2355
2356 /*
2357  * Kill the altpath contents of a da-state structure.
2358  */
2359 STATIC void
2360 xfs_da_state_kill_altpath(xfs_da_state_t *state)
2361 {
2362         int     i;
2363
2364         for (i = 0; i < state->altpath.active; i++) {
2365                 if (state->altpath.blk[i].bp) {
2366                         if (state->altpath.blk[i].bp != state->path.blk[i].bp)
2367                                 xfs_da_buf_done(state->altpath.blk[i].bp);
2368                         state->altpath.blk[i].bp = NULL;
2369                 }
2370         }
2371         state->altpath.active = 0;
2372 }
2373
2374 /*
2375  * Free a da-state structure.
2376  */
2377 void
2378 xfs_da_state_free(xfs_da_state_t *state)
2379 {
2380         int     i;
2381
2382         xfs_da_state_kill_altpath(state);
2383         for (i = 0; i < state->path.active; i++) {
2384                 if (state->path.blk[i].bp)
2385                         xfs_da_buf_done(state->path.blk[i].bp);
2386         }
2387         if (state->extravalid && state->extrablk.bp)
2388                 xfs_da_buf_done(state->extrablk.bp);
2389 #ifdef DEBUG
2390         memset((char *)state, 0, sizeof(*state));
2391 #endif /* DEBUG */
2392         kmem_zone_free(xfs_da_state_zone, state);
2393 }
2394
2395 #ifdef XFS_DABUF_DEBUG
2396 xfs_dabuf_t     *xfs_dabuf_global_list;
2397 lock_t          xfs_dabuf_global_lock;
2398 #endif
2399
2400 /*
2401  * Create a dabuf.
2402  */
2403 /* ARGSUSED */
2404 STATIC xfs_dabuf_t *
2405 xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
2406 {
2407         xfs_buf_t       *bp;
2408         xfs_dabuf_t     *dabuf;
2409         int             i;
2410         int             off;
2411
2412         if (nbuf == 1)
2413                 dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_SLEEP);
2414         else
2415                 dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_SLEEP);
2416         dabuf->dirty = 0;
2417 #ifdef XFS_DABUF_DEBUG
2418         dabuf->ra = ra;
2419         dabuf->target = XFS_BUF_TARGET(bps[0]);
2420         dabuf->blkno = XFS_BUF_ADDR(bps[0]);
2421 #endif
2422         if (nbuf == 1) {
2423                 dabuf->nbuf = 1;
2424                 bp = bps[0];
2425                 dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
2426                 dabuf->data = XFS_BUF_PTR(bp);
2427                 dabuf->bps[0] = bp;
2428         } else {
2429                 dabuf->nbuf = nbuf;
2430                 for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
2431                         dabuf->bps[i] = bp = bps[i];
2432                         dabuf->bbcount += BTOBB(XFS_BUF_COUNT(bp));
2433                 }
2434                 dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
2435                 for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2436                         bp = bps[i];
2437                         memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
2438                                 XFS_BUF_COUNT(bp));
2439                 }
2440         }
2441 #ifdef XFS_DABUF_DEBUG
2442         {
2443                 SPLDECL(s);
2444                 xfs_dabuf_t     *p;
2445
2446                 s = mutex_spinlock(&xfs_dabuf_global_lock);
2447                 for (p = xfs_dabuf_global_list; p; p = p->next) {
2448                         ASSERT(p->blkno != dabuf->blkno ||
2449                                p->target != dabuf->target);
2450                 }
2451                 dabuf->prev = NULL;
2452                 if (xfs_dabuf_global_list)
2453                         xfs_dabuf_global_list->prev = dabuf;
2454                 dabuf->next = xfs_dabuf_global_list;
2455                 xfs_dabuf_global_list = dabuf;
2456                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
2457         }
2458 #endif
2459         return dabuf;
2460 }
2461
2462 /*
2463  * Un-dirty a dabuf.
2464  */
2465 STATIC void
2466 xfs_da_buf_clean(xfs_dabuf_t *dabuf)
2467 {
2468         xfs_buf_t       *bp;
2469         int             i;
2470         int             off;
2471
2472         if (dabuf->dirty) {
2473                 ASSERT(dabuf->nbuf > 1);
2474                 dabuf->dirty = 0;
2475                 for (i = off = 0; i < dabuf->nbuf;
2476                                 i++, off += XFS_BUF_COUNT(bp)) {
2477                         bp = dabuf->bps[i];
2478                         memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
2479                                 XFS_BUF_COUNT(bp));
2480                 }
2481         }
2482 }
2483
2484 /*
2485  * Release a dabuf.
2486  */
2487 void
2488 xfs_da_buf_done(xfs_dabuf_t *dabuf)
2489 {
2490         ASSERT(dabuf);
2491         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2492         if (dabuf->dirty)
2493                 xfs_da_buf_clean(dabuf);
2494         if (dabuf->nbuf > 1)
2495                 kmem_free(dabuf->data, BBTOB(dabuf->bbcount));
2496 #ifdef XFS_DABUF_DEBUG
2497         {
2498                 SPLDECL(s);
2499
2500                 s = mutex_spinlock(&xfs_dabuf_global_lock);
2501                 if (dabuf->prev)
2502                         dabuf->prev->next = dabuf->next;
2503                 else
2504                         xfs_dabuf_global_list = dabuf->next;
2505                 if (dabuf->next)
2506                         dabuf->next->prev = dabuf->prev;
2507                 mutex_spinunlock(&xfs_dabuf_global_lock, s);
2508         }
2509         memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
2510 #endif
2511         if (dabuf->nbuf == 1)
2512                 kmem_zone_free(xfs_dabuf_zone, dabuf);
2513         else
2514                 kmem_free(dabuf, XFS_DA_BUF_SIZE(dabuf->nbuf));
2515 }
2516
2517 /*
2518  * Log transaction from a dabuf.
2519  */
2520 void
2521 xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
2522 {
2523         xfs_buf_t       *bp;
2524         uint            f;
2525         int             i;
2526         uint            l;
2527         int             off;
2528
2529         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2530         if (dabuf->nbuf == 1) {
2531                 ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
2532                 xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
2533                 return;
2534         }
2535         dabuf->dirty = 1;
2536         ASSERT(first <= last);
2537         for (i = off = 0; i < dabuf->nbuf; i++, off += XFS_BUF_COUNT(bp)) {
2538                 bp = dabuf->bps[i];
2539                 f = off;
2540                 l = f + XFS_BUF_COUNT(bp) - 1;
2541                 if (f < first)
2542                         f = first;
2543                 if (l > last)
2544                         l = last;
2545                 if (f <= l)
2546                         xfs_trans_log_buf(tp, bp, f - off, l - off);
2547                 /*
2548                  * B_DONE is set by xfs_trans_log buf.
2549                  * If we don't set it on a new buffer (get not read)
2550                  * then if we don't put anything in the buffer it won't
2551                  * be set, and at commit it it released into the cache,
2552                  * and then a read will fail.
2553                  */
2554                 else if (!(XFS_BUF_ISDONE(bp)))
2555                   XFS_BUF_DONE(bp);
2556         }
2557         ASSERT(last < off);
2558 }
2559
2560 /*
2561  * Release dabuf from a transaction.
2562  * Have to free up the dabuf before the buffers are released,
2563  * since the synchronization on the dabuf is really the lock on the buffer.
2564  */
2565 void
2566 xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2567 {
2568         xfs_buf_t       *bp;
2569         xfs_buf_t       **bplist;
2570         int             i;
2571         int             nbuf;
2572
2573         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2574         if ((nbuf = dabuf->nbuf) == 1) {
2575                 bplist = &bp;
2576                 bp = dabuf->bps[0];
2577         } else {
2578                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2579                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2580         }
2581         xfs_da_buf_done(dabuf);
2582         for (i = 0; i < nbuf; i++)
2583                 xfs_trans_brelse(tp, bplist[i]);
2584         if (bplist != &bp)
2585                 kmem_free(bplist, nbuf * sizeof(*bplist));
2586 }
2587
2588 /*
2589  * Invalidate dabuf from a transaction.
2590  */
2591 void
2592 xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
2593 {
2594         xfs_buf_t       *bp;
2595         xfs_buf_t       **bplist;
2596         int             i;
2597         int             nbuf;
2598
2599         ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
2600         if ((nbuf = dabuf->nbuf) == 1) {
2601                 bplist = &bp;
2602                 bp = dabuf->bps[0];
2603         } else {
2604                 bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
2605                 memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
2606         }
2607         xfs_da_buf_done(dabuf);
2608         for (i = 0; i < nbuf; i++)
2609                 xfs_trans_binval(tp, bplist[i]);
2610         if (bplist != &bp)
2611                 kmem_free(bplist, nbuf * sizeof(*bplist));
2612 }
2613
2614 /*
2615  * Get the first daddr from a dabuf.
2616  */
2617 xfs_daddr_t
2618 xfs_da_blkno(xfs_dabuf_t *dabuf)
2619 {
2620         ASSERT(dabuf->nbuf);
2621         ASSERT(dabuf->data);
2622         return XFS_BUF_ADDR(dabuf->bps[0]);
2623 }