Merge master.kernel.org:/home/rmk/linux-2.6-arm
[linux-2.6] / fs / xfs / xfs_dir_leaf.c
1 /*
2  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_dir.h"
26 #include "xfs_dir2.h"
27 #include "xfs_dmapi.h"
28 #include "xfs_mount.h"
29 #include "xfs_da_btree.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir_sf.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_inode_item.h"
39 #include "xfs_alloc.h"
40 #include "xfs_btree.h"
41 #include "xfs_bmap.h"
42 #include "xfs_dir_leaf.h"
43 #include "xfs_error.h"
44
45 /*
46  * xfs_dir_leaf.c
47  *
48  * Routines to implement leaf blocks of directories as Btrees of hashed names.
49  */
50
51 /*========================================================================
52  * Function prototypes for the kernel.
53  *========================================================================*/
54
55 /*
56  * Routines used for growing the Btree.
57  */
58 STATIC void xfs_dir_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args,
59                                               int insertion_index,
60                                               int freemap_index);
61 STATIC int xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer,
62                                             int musthave, int justcheck);
63 STATIC void xfs_dir_leaf_rebalance(xfs_da_state_t *state,
64                                                   xfs_da_state_blk_t *blk1,
65                                                   xfs_da_state_blk_t *blk2);
66 STATIC int xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
67                                           xfs_da_state_blk_t *leaf_blk_1,
68                                           xfs_da_state_blk_t *leaf_blk_2,
69                                           int *number_entries_in_blk1,
70                                           int *number_namebytes_in_blk1);
71
72 STATIC int xfs_dir_leaf_create(struct xfs_da_args *args,
73                                 xfs_dablk_t which_block,
74                                 struct xfs_dabuf **bpp);
75
76 /*
77  * Utility routines.
78  */
79 STATIC void xfs_dir_leaf_moveents(xfs_dir_leafblock_t *src_leaf,
80                                               int src_start,
81                                               xfs_dir_leafblock_t *dst_leaf,
82                                               int dst_start, int move_count,
83                                               xfs_mount_t *mp);
84
85
86 /*========================================================================
87  * External routines when dirsize < XFS_IFORK_DSIZE(dp).
88  *========================================================================*/
89
90
91 /*
92  * Validate a given inode number.
93  */
94 int
95 xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino)
96 {
97         xfs_agblock_t   agblkno;
98         xfs_agino_t     agino;
99         xfs_agnumber_t  agno;
100         int             ino_ok;
101         int             ioff;
102
103         agno = XFS_INO_TO_AGNO(mp, ino);
104         agblkno = XFS_INO_TO_AGBNO(mp, ino);
105         ioff = XFS_INO_TO_OFFSET(mp, ino);
106         agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
107         ino_ok =
108                 agno < mp->m_sb.sb_agcount &&
109                 agblkno < mp->m_sb.sb_agblocks &&
110                 agblkno != 0 &&
111                 ioff < (1 << mp->m_sb.sb_inopblog) &&
112                 XFS_AGINO_TO_INO(mp, agno, agino) == ino;
113         if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
114                         XFS_RANDOM_DIR_INO_VALIDATE))) {
115                 xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx",
116                                 (unsigned long long) ino);
117                 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
118                 return XFS_ERROR(EFSCORRUPTED);
119         }
120         return 0;
121 }
122
123 /*
124  * Create the initial contents of a shortform directory.
125  */
126 int
127 xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent)
128 {
129         xfs_dir_sf_hdr_t *hdr;
130         xfs_inode_t *dp;
131
132         dp = args->dp;
133         ASSERT(dp != NULL);
134         ASSERT(dp->i_d.di_size == 0);
135         if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
136                 dp->i_df.if_flags &= ~XFS_IFEXTENTS;    /* just in case */
137                 dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
138                 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
139                 dp->i_df.if_flags |= XFS_IFINLINE;
140         }
141         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
142         ASSERT(dp->i_df.if_bytes == 0);
143         xfs_idata_realloc(dp, sizeof(*hdr), XFS_DATA_FORK);
144         hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data;
145         XFS_DIR_SF_PUT_DIRINO(&parent, &hdr->parent);
146
147         hdr->count = 0;
148         dp->i_d.di_size = sizeof(*hdr);
149         xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
150         return(0);
151 }
152
153 /*
154  * Add a name to the shortform directory structure.
155  * Overflow from the inode has already been checked for.
156  */
157 int
158 xfs_dir_shortform_addname(xfs_da_args_t *args)
159 {
160         xfs_dir_shortform_t *sf;
161         xfs_dir_sf_entry_t *sfe;
162         int i, offset, size;
163         xfs_inode_t *dp;
164
165         dp = args->dp;
166         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
167         /*
168          * Catch the case where the conversion from shortform to leaf
169          * failed part way through.
170          */
171         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
172                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
173                 return XFS_ERROR(EIO);
174         }
175         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
176         ASSERT(dp->i_df.if_u1.if_data != NULL);
177         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
178         sfe = &sf->list[0];
179         for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
180                 if (sfe->namelen == args->namelen &&
181                     args->name[0] == sfe->name[0] &&
182                     memcmp(args->name, sfe->name, args->namelen) == 0)
183                         return(XFS_ERROR(EEXIST));
184                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
185         }
186
187         offset = (int)((char *)sfe - (char *)sf);
188         size = XFS_DIR_SF_ENTSIZE_BYNAME(args->namelen);
189         xfs_idata_realloc(dp, size, XFS_DATA_FORK);
190         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
191         sfe = (xfs_dir_sf_entry_t *)((char *)sf + offset);
192
193         XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
194         sfe->namelen = args->namelen;
195         memcpy(sfe->name, args->name, sfe->namelen);
196         INT_MOD(sf->hdr.count, ARCH_CONVERT, +1);
197
198         dp->i_d.di_size += size;
199         xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
200
201         return(0);
202 }
203
204 /*
205  * Remove a name from the shortform directory structure.
206  */
207 int
208 xfs_dir_shortform_removename(xfs_da_args_t *args)
209 {
210         xfs_dir_shortform_t *sf;
211         xfs_dir_sf_entry_t *sfe;
212         int base, size = 0, i;
213         xfs_inode_t *dp;
214
215         dp = args->dp;
216         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
217         /*
218          * Catch the case where the conversion from shortform to leaf
219          * failed part way through.
220          */
221         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
222                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
223                 return XFS_ERROR(EIO);
224         }
225         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
226         ASSERT(dp->i_df.if_u1.if_data != NULL);
227         base = sizeof(xfs_dir_sf_hdr_t);
228         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
229         sfe = &sf->list[0];
230         for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
231                 size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
232                 if (sfe->namelen == args->namelen &&
233                     sfe->name[0] == args->name[0] &&
234                     memcmp(sfe->name, args->name, args->namelen) == 0)
235                         break;
236                 base += size;
237                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
238         }
239         if (i < 0) {
240                 ASSERT(args->oknoent);
241                 return(XFS_ERROR(ENOENT));
242         }
243
244         if ((base + size) != dp->i_d.di_size) {
245                 memmove(&((char *)sf)[base], &((char *)sf)[base+size],
246                                               dp->i_d.di_size - (base+size));
247         }
248         INT_MOD(sf->hdr.count, ARCH_CONVERT, -1);
249
250         xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
251         dp->i_d.di_size -= size;
252         xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
253
254         return(0);
255 }
256
257 /*
258  * Look up a name in a shortform directory structure.
259  */
260 int
261 xfs_dir_shortform_lookup(xfs_da_args_t *args)
262 {
263         xfs_dir_shortform_t *sf;
264         xfs_dir_sf_entry_t *sfe;
265         int i;
266         xfs_inode_t *dp;
267
268         dp = args->dp;
269         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
270         /*
271          * Catch the case where the conversion from shortform to leaf
272          * failed part way through.
273          */
274         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
275                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
276                 return XFS_ERROR(EIO);
277         }
278         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
279         ASSERT(dp->i_df.if_u1.if_data != NULL);
280         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
281         if (args->namelen == 2 &&
282             args->name[0] == '.' && args->name[1] == '.') {
283                 XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &args->inumber);
284                 return(XFS_ERROR(EEXIST));
285         }
286         if (args->namelen == 1 && args->name[0] == '.') {
287                 args->inumber = dp->i_ino;
288                 return(XFS_ERROR(EEXIST));
289         }
290         sfe = &sf->list[0];
291         for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
292                 if (sfe->namelen == args->namelen &&
293                     sfe->name[0] == args->name[0] &&
294                     memcmp(args->name, sfe->name, args->namelen) == 0) {
295                         XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args->inumber);
296                         return(XFS_ERROR(EEXIST));
297                 }
298                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
299         }
300         ASSERT(args->oknoent);
301         return(XFS_ERROR(ENOENT));
302 }
303
304 /*
305  * Convert from using the shortform to the leaf.
306  */
307 int
308 xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
309 {
310         xfs_inode_t *dp;
311         xfs_dir_shortform_t *sf;
312         xfs_dir_sf_entry_t *sfe;
313         xfs_da_args_t args;
314         xfs_ino_t inumber;
315         char *tmpbuffer;
316         int retval, i, size;
317         xfs_dablk_t blkno;
318         xfs_dabuf_t *bp;
319
320         dp = iargs->dp;
321         /*
322          * Catch the case where the conversion from shortform to leaf
323          * failed part way through.
324          */
325         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
326                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
327                 return XFS_ERROR(EIO);
328         }
329         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
330         ASSERT(dp->i_df.if_u1.if_data != NULL);
331         size = dp->i_df.if_bytes;
332         tmpbuffer = kmem_alloc(size, KM_SLEEP);
333         ASSERT(tmpbuffer != NULL);
334
335         memcpy(tmpbuffer, dp->i_df.if_u1.if_data, size);
336
337         sf = (xfs_dir_shortform_t *)tmpbuffer;
338         XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &inumber);
339
340         xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
341         dp->i_d.di_size = 0;
342         xfs_trans_log_inode(iargs->trans, dp, XFS_ILOG_CORE);
343         retval = xfs_da_grow_inode(iargs, &blkno);
344         if (retval)
345                 goto out;
346
347         ASSERT(blkno == 0);
348         retval = xfs_dir_leaf_create(iargs, blkno, &bp);
349         if (retval)
350                 goto out;
351         xfs_da_buf_done(bp);
352
353         args.name = ".";
354         args.namelen = 1;
355         args.hashval = xfs_dir_hash_dot;
356         args.inumber = dp->i_ino;
357         args.dp = dp;
358         args.firstblock = iargs->firstblock;
359         args.flist = iargs->flist;
360         args.total = iargs->total;
361         args.whichfork = XFS_DATA_FORK;
362         args.trans = iargs->trans;
363         args.justcheck = 0;
364         args.addname = args.oknoent = 1;
365         retval = xfs_dir_leaf_addname(&args);
366         if (retval)
367                 goto out;
368
369         args.name = "..";
370         args.namelen = 2;
371         args.hashval = xfs_dir_hash_dotdot;
372         args.inumber = inumber;
373         retval = xfs_dir_leaf_addname(&args);
374         if (retval)
375                 goto out;
376
377         sfe = &sf->list[0];
378         for (i = 0; i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
379                 args.name = (char *)(sfe->name);
380                 args.namelen = sfe->namelen;
381                 args.hashval = xfs_da_hashname((char *)(sfe->name),
382                                                sfe->namelen);
383                 XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args.inumber);
384                 retval = xfs_dir_leaf_addname(&args);
385                 if (retval)
386                         goto out;
387                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
388         }
389         retval = 0;
390
391 out:
392         kmem_free(tmpbuffer, size);
393         return(retval);
394 }
395
396 STATIC int
397 xfs_dir_shortform_compare(const void *a, const void *b)
398 {
399         xfs_dir_sf_sort_t *sa, *sb;
400
401         sa = (xfs_dir_sf_sort_t *)a;
402         sb = (xfs_dir_sf_sort_t *)b;
403         if (sa->hash < sb->hash)
404                 return -1;
405         else if (sa->hash > sb->hash)
406                 return 1;
407         else
408                 return sa->entno - sb->entno;
409 }
410
411 /*
412  * Copy out directory entries for getdents(), for shortform directories.
413  */
414 /*ARGSUSED*/
415 int
416 xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp,
417                                        xfs_dirent_t *dbp, xfs_dir_put_t put)
418 {
419         xfs_dir_shortform_t *sf;
420         xfs_dir_sf_entry_t *sfe;
421         int retval, i, sbsize, nsbuf, lastresid=0, want_entno;
422         xfs_mount_t *mp;
423         xfs_dahash_t cookhash, hash;
424         xfs_dir_put_args_t p;
425         xfs_dir_sf_sort_t *sbuf, *sbp;
426
427         mp = dp->i_mount;
428         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
429         cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
430         want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
431         nsbuf = INT_GET(sf->hdr.count, ARCH_CONVERT) + 2;
432         sbsize = (nsbuf + 1) * sizeof(*sbuf);
433         sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
434
435         xfs_dir_trace_g_du("sf: start", dp, uio);
436
437         /*
438          * Collect all the entries into the buffer.
439          * Entry 0 is .
440          */
441         sbp->entno = 0;
442         sbp->seqno = 0;
443         sbp->hash = xfs_dir_hash_dot;
444         sbp->ino = dp->i_ino;
445         sbp->name = ".";
446         sbp->namelen = 1;
447         sbp++;
448
449         /*
450          * Entry 1 is ..
451          */
452         sbp->entno = 1;
453         sbp->seqno = 0;
454         sbp->hash = xfs_dir_hash_dotdot;
455         sbp->ino = XFS_GET_DIR_INO8(sf->hdr.parent);
456         sbp->name = "..";
457         sbp->namelen = 2;
458         sbp++;
459
460         /*
461          * Scan the directory data for the rest of the entries.
462          */
463         for (i = 0, sfe = &sf->list[0];
464                         i < INT_GET(sf->hdr.count, ARCH_CONVERT); i++) {
465
466                 if (unlikely(
467                     ((char *)sfe < (char *)sf) ||
468                     ((char *)sfe >= ((char *)sf + dp->i_df.if_bytes)))) {
469                         xfs_dir_trace_g_du("sf: corrupted", dp, uio);
470                         XFS_CORRUPTION_ERROR("xfs_dir_shortform_getdents",
471                                              XFS_ERRLEVEL_LOW, mp, sfe);
472                         kmem_free(sbuf, sbsize);
473                         return XFS_ERROR(EFSCORRUPTED);
474                 }
475
476                 sbp->entno = i + 2;
477                 sbp->seqno = 0;
478                 sbp->hash = xfs_da_hashname((char *)sfe->name, sfe->namelen);
479                 sbp->ino = XFS_GET_DIR_INO8(sfe->inumber);
480                 sbp->name = (char *)sfe->name;
481                 sbp->namelen = sfe->namelen;
482                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
483                 sbp++;
484         }
485
486         /*
487          * Sort the entries on hash then entno.
488          */
489         xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_dir_shortform_compare);
490         /*
491          * Stuff in last entry.
492          */
493         sbp->entno = nsbuf;
494         sbp->hash = XFS_DA_MAXHASH;
495         sbp->seqno = 0;
496         /*
497          * Figure out the sequence numbers in case there's a hash duplicate.
498          */
499         for (hash = sbuf->hash, sbp = sbuf + 1;
500                                 sbp < &sbuf[nsbuf + 1]; sbp++) {
501                 if (sbp->hash == hash)
502                         sbp->seqno = sbp[-1].seqno + 1;
503                 else
504                         hash = sbp->hash;
505         }
506
507         /*
508          * Set up put routine.
509          */
510         p.dbp = dbp;
511         p.put = put;
512         p.uio = uio;
513
514         /*
515          * Find our place.
516          */
517         for (sbp = sbuf; sbp < &sbuf[nsbuf + 1]; sbp++) {
518                 if (sbp->hash > cookhash ||
519                     (sbp->hash == cookhash && sbp->seqno >= want_entno))
520                         break;
521         }
522
523         /*
524          * Did we fail to find anything?  We stop at the last entry,
525          * the one we put maxhash into.
526          */
527         if (sbp == &sbuf[nsbuf]) {
528                 kmem_free(sbuf, sbsize);
529                 xfs_dir_trace_g_du("sf: hash beyond end", dp, uio);
530                 uio->uio_offset = XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
531                 *eofp = 1;
532                 return 0;
533         }
534
535         /*
536          * Loop putting entries into the user buffer.
537          */
538         while (sbp < &sbuf[nsbuf]) {
539                 /*
540                  * Save the first resid in a run of equal-hashval entries
541                  * so that we can back them out if they don't all fit.
542                  */
543                 if (sbp->seqno == 0 || sbp == sbuf)
544                         lastresid = uio->uio_resid;
545                 XFS_PUT_COOKIE(p.cook, mp, 0, sbp[1].seqno, sbp[1].hash);
546                 p.ino = sbp->ino;
547 #if XFS_BIG_INUMS
548                 p.ino += mp->m_inoadd;
549 #endif
550                 p.name = sbp->name;
551                 p.namelen = sbp->namelen;
552                 retval = p.put(&p);
553                 if (!p.done) {
554                         uio->uio_offset =
555                                 XFS_DA_MAKE_COOKIE(mp, 0, 0, sbp->hash);
556                         kmem_free(sbuf, sbsize);
557                         uio->uio_resid = lastresid;
558                         xfs_dir_trace_g_du("sf: E-O-B", dp, uio);
559                         return retval;
560                 }
561                 sbp++;
562         }
563         kmem_free(sbuf, sbsize);
564         uio->uio_offset = p.cook.o;
565         *eofp = 1;
566         xfs_dir_trace_g_du("sf: E-O-F", dp, uio);
567         return 0;
568 }
569
570 /*
571  * Look up a name in a shortform directory structure, replace the inode number.
572  */
573 int
574 xfs_dir_shortform_replace(xfs_da_args_t *args)
575 {
576         xfs_dir_shortform_t *sf;
577         xfs_dir_sf_entry_t *sfe;
578         xfs_inode_t *dp;
579         int i;
580
581         dp = args->dp;
582         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
583         /*
584          * Catch the case where the conversion from shortform to leaf
585          * failed part way through.
586          */
587         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
588                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
589                 return XFS_ERROR(EIO);
590         }
591         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
592         ASSERT(dp->i_df.if_u1.if_data != NULL);
593         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
594         if (args->namelen == 2 &&
595             args->name[0] == '.' && args->name[1] == '.') {
596                 /* XXX - replace assert? */
597                 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sf->hdr.parent);
598                 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
599                 return(0);
600         }
601         ASSERT(args->namelen != 1 || args->name[0] != '.');
602         sfe = &sf->list[0];
603         for (i = INT_GET(sf->hdr.count, ARCH_CONVERT)-1; i >= 0; i--) {
604                 if (sfe->namelen == args->namelen &&
605                     sfe->name[0] == args->name[0] &&
606                     memcmp(args->name, sfe->name, args->namelen) == 0) {
607                         ASSERT(memcmp((char *)&args->inumber,
608                                 (char *)&sfe->inumber, sizeof(xfs_ino_t)));
609                         XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
610                         xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
611                         return(0);
612                 }
613                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
614         }
615         ASSERT(args->oknoent);
616         return(XFS_ERROR(ENOENT));
617 }
618
619 /*
620  * Convert a leaf directory to shortform structure
621  */
622 int
623 xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
624 {
625         xfs_dir_leafblock_t *leaf;
626         xfs_dir_leaf_hdr_t *hdr;
627         xfs_dir_leaf_entry_t *entry;
628         xfs_dir_leaf_name_t *namest;
629         xfs_da_args_t args;
630         xfs_inode_t *dp;
631         xfs_ino_t parent = 0;
632         char *tmpbuffer;
633         int retval, i;
634         xfs_dabuf_t *bp;
635
636         dp = iargs->dp;
637         tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
638         ASSERT(tmpbuffer != NULL);
639
640         retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp,
641                                                XFS_DATA_FORK);
642         if (retval)
643                 goto out;
644         ASSERT(bp != NULL);
645         memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
646         leaf = (xfs_dir_leafblock_t *)tmpbuffer;
647         ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
648         memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
649
650         /*
651          * Find and special case the parent inode number
652          */
653         hdr = &leaf->hdr;
654         entry = &leaf->entries[0];
655         for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
656                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
657                 if ((entry->namelen == 2) &&
658                     (namest->name[0] == '.') &&
659                     (namest->name[1] == '.')) {
660                         XFS_DIR_SF_GET_DIRINO(&namest->inumber, &parent);
661                         entry->nameidx = 0;
662                 } else if ((entry->namelen == 1) && (namest->name[0] == '.')) {
663                         entry->nameidx = 0;
664                 }
665         }
666         retval = xfs_da_shrink_inode(iargs, 0, bp);
667         if (retval)
668                 goto out;
669         retval = xfs_dir_shortform_create(iargs, parent);
670         if (retval)
671                 goto out;
672
673         /*
674          * Copy the rest of the filenames
675          */
676         entry = &leaf->entries[0];
677         args.dp = dp;
678         args.firstblock = iargs->firstblock;
679         args.flist = iargs->flist;
680         args.total = iargs->total;
681         args.whichfork = XFS_DATA_FORK;
682         args.trans = iargs->trans;
683         args.justcheck = 0;
684         args.addname = args.oknoent = 1;
685         for (i = 0; i < INT_GET(hdr->count, ARCH_CONVERT); entry++, i++) {
686                 if (!entry->nameidx)
687                         continue;
688                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
689                 args.name = (char *)(namest->name);
690                 args.namelen = entry->namelen;
691                 args.hashval = INT_GET(entry->hashval, ARCH_CONVERT);
692                 XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args.inumber);
693                 xfs_dir_shortform_addname(&args);
694         }
695
696 out:
697         kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
698         return(retval);
699 }
700
701 /*
702  * Convert from using a single leaf to a root node and a leaf.
703  */
704 int
705 xfs_dir_leaf_to_node(xfs_da_args_t *args)
706 {
707         xfs_dir_leafblock_t *leaf;
708         xfs_da_intnode_t *node;
709         xfs_inode_t *dp;
710         xfs_dabuf_t *bp1, *bp2;
711         xfs_dablk_t blkno;
712         int retval;
713
714         dp = args->dp;
715         retval = xfs_da_grow_inode(args, &blkno);
716         ASSERT(blkno == 1);
717         if (retval)
718                 return(retval);
719         retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
720                                               XFS_DATA_FORK);
721         if (retval)
722                 return(retval);
723         ASSERT(bp1 != NULL);
724         retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2,
725                                              XFS_DATA_FORK);
726         if (retval) {
727                 xfs_da_buf_done(bp1);
728                 return(retval);
729         }
730         ASSERT(bp2 != NULL);
731         memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
732         xfs_da_buf_done(bp1);
733         xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
734
735         /*
736          * Set up the new root node.
737          */
738         retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK);
739         if (retval) {
740                 xfs_da_buf_done(bp2);
741                 return(retval);
742         }
743         node = bp1->data;
744         leaf = bp2->data;
745         ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
746         INT_SET(node->btree[0].hashval, ARCH_CONVERT, INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
747         xfs_da_buf_done(bp2);
748         INT_SET(node->btree[0].before, ARCH_CONVERT, blkno);
749         INT_SET(node->hdr.count, ARCH_CONVERT, 1);
750         xfs_da_log_buf(args->trans, bp1,
751                 XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
752         xfs_da_buf_done(bp1);
753
754         return(retval);
755 }
756
757
758 /*========================================================================
759  * Routines used for growing the Btree.
760  *========================================================================*/
761
762 /*
763  * Create the initial contents of a leaf directory
764  * or a leaf in a node directory.
765  */
766 STATIC int
767 xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
768 {
769         xfs_dir_leafblock_t *leaf;
770         xfs_dir_leaf_hdr_t *hdr;
771         xfs_inode_t *dp;
772         xfs_dabuf_t *bp;
773         int retval;
774
775         dp = args->dp;
776         ASSERT(dp != NULL);
777         retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK);
778         if (retval)
779                 return(retval);
780         ASSERT(bp != NULL);
781         leaf = bp->data;
782         memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
783         hdr = &leaf->hdr;
784         INT_SET(hdr->info.magic, ARCH_CONVERT, XFS_DIR_LEAF_MAGIC);
785         INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount));
786         if (!hdr->firstused)
787                 INT_SET(hdr->firstused, ARCH_CONVERT, XFS_LBSIZE(dp->i_mount) - 1);
788         INT_SET(hdr->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
789         INT_SET(hdr->freemap[0].size, ARCH_CONVERT, INT_GET(hdr->firstused, ARCH_CONVERT) - INT_GET(hdr->freemap[0].base, ARCH_CONVERT));
790
791         xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
792
793         *bpp = bp;
794         return(0);
795 }
796
797 /*
798  * Split the leaf node, rebalance, then add the new entry.
799  */
800 int
801 xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
802                                   xfs_da_state_blk_t *newblk)
803 {
804         xfs_dablk_t blkno;
805         xfs_da_args_t *args;
806         int error;
807
808         /*
809          * Allocate space for a new leaf node.
810          */
811         args = state->args;
812         ASSERT(args != NULL);
813         ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC);
814         error = xfs_da_grow_inode(args, &blkno);
815         if (error)
816                 return(error);
817         error = xfs_dir_leaf_create(args, blkno, &newblk->bp);
818         if (error)
819                 return(error);
820         newblk->blkno = blkno;
821         newblk->magic = XFS_DIR_LEAF_MAGIC;
822
823         /*
824          * Rebalance the entries across the two leaves.
825          */
826         xfs_dir_leaf_rebalance(state, oldblk, newblk);
827         error = xfs_da_blk_link(state, oldblk, newblk);
828         if (error)
829                 return(error);
830
831         /*
832          * Insert the new entry in the correct block.
833          */
834         if (state->inleaf) {
835                 error = xfs_dir_leaf_add(oldblk->bp, args, oldblk->index);
836         } else {
837                 error = xfs_dir_leaf_add(newblk->bp, args, newblk->index);
838         }
839
840         /*
841          * Update last hashval in each block since we added the name.
842          */
843         oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL);
844         newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL);
845         return(error);
846 }
847
848 /*
849  * Add a name to the leaf directory structure.
850  *
851  * Must take into account fragmented leaves and leaves where spacemap has
852  * lost some freespace information (ie: holes).
853  */
854 int
855 xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
856 {
857         xfs_dir_leafblock_t *leaf;
858         xfs_dir_leaf_hdr_t *hdr;
859         xfs_dir_leaf_map_t *map;
860         int tablesize, entsize, sum, i, tmp, error;
861
862         leaf = bp->data;
863         ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
864         ASSERT((index >= 0) && (index <= INT_GET(leaf->hdr.count, ARCH_CONVERT)));
865         hdr = &leaf->hdr;
866         entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen);
867
868         /*
869          * Search through freemap for first-fit on new name length.
870          * (may need to figure in size of entry struct too)
871          */
872         tablesize = (INT_GET(hdr->count, ARCH_CONVERT) + 1) * (uint)sizeof(xfs_dir_leaf_entry_t)
873                         + (uint)sizeof(xfs_dir_leaf_hdr_t);
874         map = &hdr->freemap[XFS_DIR_LEAF_MAPSIZE-1];
875         for (sum = 0, i = XFS_DIR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
876                 if (tablesize > INT_GET(hdr->firstused, ARCH_CONVERT)) {
877                         sum += INT_GET(map->size, ARCH_CONVERT);
878                         continue;
879                 }
880                 if (!map->size)
881                         continue;       /* no space in this map */
882                 tmp = entsize;
883                 if (INT_GET(map->base, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
884                         tmp += (uint)sizeof(xfs_dir_leaf_entry_t);
885                 if (INT_GET(map->size, ARCH_CONVERT) >= tmp) {
886                         if (!args->justcheck)
887                                 xfs_dir_leaf_add_work(bp, args, index, i);
888                         return(0);
889                 }
890                 sum += INT_GET(map->size, ARCH_CONVERT);
891         }
892
893         /*
894          * If there are no holes in the address space of the block,
895          * and we don't have enough freespace, then compaction will do us
896          * no good and we should just give up.
897          */
898         if (!hdr->holes && (sum < entsize))
899                 return(XFS_ERROR(ENOSPC));
900
901         /*
902          * Compact the entries to coalesce free space.
903          * Pass the justcheck flag so the checking pass can return
904          * an error, without changing anything, if it won't fit.
905          */
906         error = xfs_dir_leaf_compact(args->trans, bp,
907                         args->total == 0 ?
908                                 entsize +
909                                 (uint)sizeof(xfs_dir_leaf_entry_t) : 0,
910                         args->justcheck);
911         if (error)
912                 return(error);
913         /*
914          * After compaction, the block is guaranteed to have only one
915          * free region, in freemap[0].  If it is not big enough, give up.
916          */
917         if (INT_GET(hdr->freemap[0].size, ARCH_CONVERT) <
918             (entsize + (uint)sizeof(xfs_dir_leaf_entry_t)))
919                 return(XFS_ERROR(ENOSPC));
920
921         if (!args->justcheck)
922                 xfs_dir_leaf_add_work(bp, args, index, 0);
923         return(0);
924 }
925
926 /*
927  * Add a name to a leaf directory structure.
928  */
929 STATIC void
930 xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
931                       int mapindex)
932 {
933         xfs_dir_leafblock_t *leaf;
934         xfs_dir_leaf_hdr_t *hdr;
935         xfs_dir_leaf_entry_t *entry;
936         xfs_dir_leaf_name_t *namest;
937         xfs_dir_leaf_map_t *map;
938         /* REFERENCED */
939         xfs_mount_t *mp;
940         int tmp, i;
941
942         leaf = bp->data;
943         ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
944         hdr = &leaf->hdr;
945         ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE));
946         ASSERT((index >= 0) && (index <= INT_GET(hdr->count, ARCH_CONVERT)));
947
948         /*
949          * Force open some space in the entry array and fill it in.
950          */
951         entry = &leaf->entries[index];
952         if (index < INT_GET(hdr->count, ARCH_CONVERT)) {
953                 tmp  = INT_GET(hdr->count, ARCH_CONVERT) - index;
954                 tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
955                 memmove(entry + 1, entry, tmp);
956                 xfs_da_log_buf(args->trans, bp,
957                     XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
958         }
959         INT_MOD(hdr->count, ARCH_CONVERT, +1);
960
961         /*
962          * Allocate space for the new string (at the end of the run).
963          */
964         map = &hdr->freemap[mapindex];
965         mp = args->trans->t_mountp;
966         ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
967         ASSERT(INT_GET(map->size, ARCH_CONVERT) >= XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen));
968         ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
969         INT_MOD(map->size, ARCH_CONVERT, -(XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen)));
970         INT_SET(entry->nameidx, ARCH_CONVERT, INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT));
971         INT_SET(entry->hashval, ARCH_CONVERT, args->hashval);
972         entry->namelen = args->namelen;
973         xfs_da_log_buf(args->trans, bp,
974             XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
975
976         /*
977          * Copy the string and inode number into the new space.
978          */
979         namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
980         XFS_DIR_SF_PUT_DIRINO(&args->inumber, &namest->inumber);
981         memcpy(namest->name, args->name, args->namelen);
982         xfs_da_log_buf(args->trans, bp,
983             XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)));
984
985         /*
986          * Update the control info for this leaf node
987          */
988         if (INT_GET(entry->nameidx, ARCH_CONVERT) < INT_GET(hdr->firstused, ARCH_CONVERT))
989                 INT_COPY(hdr->firstused, entry->nameidx, ARCH_CONVERT);
990         ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
991         tmp = (INT_GET(hdr->count, ARCH_CONVERT)-1) * (uint)sizeof(xfs_dir_leaf_entry_t)
992                         + (uint)sizeof(xfs_dir_leaf_hdr_t);
993         map = &hdr->freemap[0];
994         for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
995                 if (INT_GET(map->base, ARCH_CONVERT) == tmp) {
996                         INT_MOD(map->base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
997                         INT_MOD(map->size, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
998                 }
999         }
1000         INT_MOD(hdr->namebytes, ARCH_CONVERT, args->namelen);
1001         xfs_da_log_buf(args->trans, bp,
1002                 XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
1003 }
1004
1005 /*
1006  * Garbage collect a leaf directory block by copying it to a new buffer.
1007  */
1008 STATIC int
1009 xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
1010                      int justcheck)
1011 {
1012         xfs_dir_leafblock_t *leaf_s, *leaf_d;
1013         xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
1014         xfs_mount_t *mp;
1015         char *tmpbuffer;
1016         char *tmpbuffer2=NULL;
1017         int rval;
1018         int lbsize;
1019
1020         mp = trans->t_mountp;
1021         lbsize = XFS_LBSIZE(mp);
1022         tmpbuffer = kmem_alloc(lbsize, KM_SLEEP);
1023         ASSERT(tmpbuffer != NULL);
1024         memcpy(tmpbuffer, bp->data, lbsize);
1025
1026         /*
1027          * Make a second copy in case xfs_dir_leaf_moveents()
1028          * below destroys the original.
1029          */
1030         if (musthave || justcheck) {
1031                 tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP);
1032                 memcpy(tmpbuffer2, bp->data, lbsize);
1033         }
1034         memset(bp->data, 0, lbsize);
1035
1036         /*
1037          * Copy basic information
1038          */
1039         leaf_s = (xfs_dir_leafblock_t *)tmpbuffer;
1040         leaf_d = bp->data;
1041         hdr_s = &leaf_s->hdr;
1042         hdr_d = &leaf_d->hdr;
1043         hdr_d->info = hdr_s->info;      /* struct copy */
1044         INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize);
1045         if (!hdr_d->firstused)
1046                 INT_SET(hdr_d->firstused, ARCH_CONVERT, lbsize - 1);
1047         hdr_d->namebytes = 0;
1048         hdr_d->count = 0;
1049         hdr_d->holes = 0;
1050         INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, sizeof(xfs_dir_leaf_hdr_t));
1051         INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
1052
1053         /*
1054          * Copy all entry's in the same (sorted) order,
1055          * but allocate filenames packed and in sequence.
1056          * This changes the source (leaf_s) as well.
1057          */
1058         xfs_dir_leaf_moveents(leaf_s, 0, leaf_d, 0, (int)INT_GET(hdr_s->count, ARCH_CONVERT), mp);
1059
1060         if (musthave && INT_GET(hdr_d->freemap[0].size, ARCH_CONVERT) < musthave)
1061                 rval = XFS_ERROR(ENOSPC);
1062         else
1063                 rval = 0;
1064
1065         if (justcheck || rval == ENOSPC) {
1066                 ASSERT(tmpbuffer2);
1067                 memcpy(bp->data, tmpbuffer2, lbsize);
1068         } else {
1069                 xfs_da_log_buf(trans, bp, 0, lbsize - 1);
1070         }
1071
1072         kmem_free(tmpbuffer, lbsize);
1073         if (musthave || justcheck)
1074                 kmem_free(tmpbuffer2, lbsize);
1075         return(rval);
1076 }
1077
1078 /*
1079  * Redistribute the directory entries between two leaf nodes,
1080  * taking into account the size of the new entry.
1081  *
1082  * NOTE: if new block is empty, then it will get the upper half of old block.
1083  */
1084 STATIC void
1085 xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1086                                       xfs_da_state_blk_t *blk2)
1087 {
1088         xfs_da_state_blk_t *tmp_blk;
1089         xfs_dir_leafblock_t *leaf1, *leaf2;
1090         xfs_dir_leaf_hdr_t *hdr1, *hdr2;
1091         int count, totallen, max, space, swap;
1092
1093         /*
1094          * Set up environment.
1095          */
1096         ASSERT(blk1->magic == XFS_DIR_LEAF_MAGIC);
1097         ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC);
1098         leaf1 = blk1->bp->data;
1099         leaf2 = blk2->bp->data;
1100         ASSERT(INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1101         ASSERT(INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1102
1103         /*
1104          * Check ordering of blocks, reverse if it makes things simpler.
1105          */
1106         swap = 0;
1107         if (xfs_dir_leaf_order(blk1->bp, blk2->bp)) {
1108                 tmp_blk = blk1;
1109                 blk1 = blk2;
1110                 blk2 = tmp_blk;
1111                 leaf1 = blk1->bp->data;
1112                 leaf2 = blk2->bp->data;
1113                 swap = 1;
1114         }
1115         hdr1 = &leaf1->hdr;
1116         hdr2 = &leaf2->hdr;
1117
1118         /*
1119          * Examine entries until we reduce the absolute difference in
1120          * byte usage between the two blocks to a minimum.  Then get
1121          * the direction to copy and the number of elements to move.
1122          */
1123         state->inleaf = xfs_dir_leaf_figure_balance(state, blk1, blk2,
1124                                                            &count, &totallen);
1125         if (swap)
1126                 state->inleaf = !state->inleaf;
1127
1128         /*
1129          * Move any entries required from leaf to leaf:
1130          */
1131         if (count < INT_GET(hdr1->count, ARCH_CONVERT)) {
1132                 /*
1133                  * Figure the total bytes to be added to the destination leaf.
1134                  */
1135                 count = INT_GET(hdr1->count, ARCH_CONVERT) - count;     /* number entries being moved */
1136                 space  = INT_GET(hdr1->namebytes, ARCH_CONVERT) - totallen;
1137                 space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
1138                 space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
1139
1140                 /*
1141                  * leaf2 is the destination, compact it if it looks tight.
1142                  */
1143                 max  = INT_GET(hdr2->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
1144                 max -= INT_GET(hdr2->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
1145                 if (space > max) {
1146                         xfs_dir_leaf_compact(state->args->trans, blk2->bp,
1147                                                                  0, 0);
1148                 }
1149
1150                 /*
1151                  * Move high entries from leaf1 to low end of leaf2.
1152                  */
1153                 xfs_dir_leaf_moveents(leaf1, INT_GET(hdr1->count, ARCH_CONVERT) - count,
1154                                              leaf2, 0, count, state->mp);
1155
1156                 xfs_da_log_buf(state->args->trans, blk1->bp, 0,
1157                                                    state->blocksize-1);
1158                 xfs_da_log_buf(state->args->trans, blk2->bp, 0,
1159                                                    state->blocksize-1);
1160
1161         } else if (count > INT_GET(hdr1->count, ARCH_CONVERT)) {
1162                 /*
1163                  * Figure the total bytes to be added to the destination leaf.
1164                  */
1165                 count -= INT_GET(hdr1->count, ARCH_CONVERT);            /* number entries being moved */
1166                 space  = totallen - INT_GET(hdr1->namebytes, ARCH_CONVERT);
1167                 space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
1168                 space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
1169
1170                 /*
1171                  * leaf1 is the destination, compact it if it looks tight.
1172                  */
1173                 max  = INT_GET(hdr1->firstused, ARCH_CONVERT) - (uint)sizeof(xfs_dir_leaf_hdr_t);
1174                 max -= INT_GET(hdr1->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
1175                 if (space > max) {
1176                         xfs_dir_leaf_compact(state->args->trans, blk1->bp,
1177                                                                  0, 0);
1178                 }
1179
1180                 /*
1181                  * Move low entries from leaf2 to high end of leaf1.
1182                  */
1183                 xfs_dir_leaf_moveents(leaf2, 0, leaf1, (int)INT_GET(hdr1->count, ARCH_CONVERT),
1184                                              count, state->mp);
1185
1186                 xfs_da_log_buf(state->args->trans, blk1->bp, 0,
1187                                                    state->blocksize-1);
1188                 xfs_da_log_buf(state->args->trans, blk2->bp, 0,
1189                                                    state->blocksize-1);
1190         }
1191
1192         /*
1193          * Copy out last hashval in each block for B-tree code.
1194          */
1195         blk1->hashval = INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1196         blk2->hashval = INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1197
1198         /*
1199          * Adjust the expected index for insertion.
1200          * GROT: this doesn't work unless blk2 was originally empty.
1201          */
1202         if (!state->inleaf) {
1203                 blk2->index = blk1->index - INT_GET(leaf1->hdr.count, ARCH_CONVERT);
1204         }
1205 }
1206
1207 /*
1208  * Examine entries until we reduce the absolute difference in
1209  * byte usage between the two blocks to a minimum.
1210  * GROT: Is this really necessary?  With other than a 512 byte blocksize,
1211  * GROT: there will always be enough room in either block for a new entry.
1212  * GROT: Do a double-split for this case?
1213  */
1214 STATIC int
1215 xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
1216                                            xfs_da_state_blk_t *blk1,
1217                                            xfs_da_state_blk_t *blk2,
1218                                            int *countarg, int *namebytesarg)
1219 {
1220         xfs_dir_leafblock_t *leaf1, *leaf2;
1221         xfs_dir_leaf_hdr_t *hdr1, *hdr2;
1222         xfs_dir_leaf_entry_t *entry;
1223         int count, max, totallen, half;
1224         int lastdelta, foundit, tmp;
1225
1226         /*
1227          * Set up environment.
1228          */
1229         leaf1 = blk1->bp->data;
1230         leaf2 = blk2->bp->data;
1231         hdr1 = &leaf1->hdr;
1232         hdr2 = &leaf2->hdr;
1233         foundit = 0;
1234         totallen = 0;
1235
1236         /*
1237          * Examine entries until we reduce the absolute difference in
1238          * byte usage between the two blocks to a minimum.
1239          */
1240         max = INT_GET(hdr1->count, ARCH_CONVERT) + INT_GET(hdr2->count, ARCH_CONVERT);
1241         half  = (max+1) * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
1242         half += INT_GET(hdr1->namebytes, ARCH_CONVERT) + INT_GET(hdr2->namebytes, ARCH_CONVERT) + state->args->namelen;
1243         half /= 2;
1244         lastdelta = state->blocksize;
1245         entry = &leaf1->entries[0];
1246         for (count = 0; count < max; entry++, count++) {
1247
1248 #define XFS_DIR_ABS(A)  (((A) < 0) ? -(A) : (A))
1249                 /*
1250                  * The new entry is in the first block, account for it.
1251                  */
1252                 if (count == blk1->index) {
1253                         tmp = totallen + (uint)sizeof(*entry)
1254                                 + XFS_DIR_LEAF_ENTSIZE_BYNAME(state->args->namelen);
1255                         if (XFS_DIR_ABS(half - tmp) > lastdelta)
1256                                 break;
1257                         lastdelta = XFS_DIR_ABS(half - tmp);
1258                         totallen = tmp;
1259                         foundit = 1;
1260                 }
1261
1262                 /*
1263                  * Wrap around into the second block if necessary.
1264                  */
1265                 if (count == INT_GET(hdr1->count, ARCH_CONVERT)) {
1266                         leaf1 = leaf2;
1267                         entry = &leaf1->entries[0];
1268                 }
1269
1270                 /*
1271                  * Figure out if next leaf entry would be too much.
1272                  */
1273                 tmp = totallen + (uint)sizeof(*entry)
1274                                 + XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
1275                 if (XFS_DIR_ABS(half - tmp) > lastdelta)
1276                         break;
1277                 lastdelta = XFS_DIR_ABS(half - tmp);
1278                 totallen = tmp;
1279 #undef XFS_DIR_ABS
1280         }
1281
1282         /*
1283          * Calculate the number of namebytes that will end up in lower block.
1284          * If new entry not in lower block, fix up the count.
1285          */
1286         totallen -=
1287                 count * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
1288         if (foundit) {
1289                 totallen -= (sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1) +
1290                             state->args->namelen;
1291         }
1292
1293         *countarg = count;
1294         *namebytesarg = totallen;
1295         return(foundit);
1296 }
1297
1298 /*========================================================================
1299  * Routines used for shrinking the Btree.
1300  *========================================================================*/
1301
1302 /*
1303  * Check a leaf block and its neighbors to see if the block should be
1304  * collapsed into one or the other neighbor.  Always keep the block
1305  * with the smaller block number.
1306  * If the current block is over 50% full, don't try to join it, return 0.
1307  * If the block is empty, fill in the state structure and return 2.
1308  * If it can be collapsed, fill in the state structure and return 1.
1309  * If nothing can be done, return 0.
1310  */
1311 int
1312 xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
1313 {
1314         xfs_dir_leafblock_t *leaf;
1315         xfs_da_state_blk_t *blk;
1316         xfs_da_blkinfo_t *info;
1317         int count, bytes, forward, error, retval, i;
1318         xfs_dablk_t blkno;
1319         xfs_dabuf_t *bp;
1320
1321         /*
1322          * Check for the degenerate case of the block being over 50% full.
1323          * If so, it's not worth even looking to see if we might be able
1324          * to coalesce with a sibling.
1325          */
1326         blk = &state->path.blk[ state->path.active-1 ];
1327         info = blk->bp->data;
1328         ASSERT(INT_GET(info->magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1329         leaf = (xfs_dir_leafblock_t *)info;
1330         count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
1331         bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) +
1332                 count * (uint)sizeof(xfs_dir_leaf_entry_t) +
1333                 count * ((uint)sizeof(xfs_dir_leaf_name_t)-1) +
1334                 INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1335         if (bytes > (state->blocksize >> 1)) {
1336                 *action = 0;    /* blk over 50%, don't try to join */
1337                 return(0);
1338         }
1339
1340         /*
1341          * Check for the degenerate case of the block being empty.
1342          * If the block is empty, we'll simply delete it, no need to
1343          * coalesce it with a sibling block.  We choose (aribtrarily)
1344          * to merge with the forward block unless it is NULL.
1345          */
1346         if (count == 0) {
1347                 /*
1348                  * Make altpath point to the block we want to keep and
1349                  * path point to the block we want to drop (this one).
1350                  */
1351                 forward = info->forw;
1352                 memcpy(&state->altpath, &state->path, sizeof(state->path));
1353                 error = xfs_da_path_shift(state, &state->altpath, forward,
1354                                                  0, &retval);
1355                 if (error)
1356                         return(error);
1357                 if (retval) {
1358                         *action = 0;
1359                 } else {
1360                         *action = 2;
1361                 }
1362                 return(0);
1363         }
1364
1365         /*
1366          * Examine each sibling block to see if we can coalesce with
1367          * at least 25% free space to spare.  We need to figure out
1368          * whether to merge with the forward or the backward block.
1369          * We prefer coalescing with the lower numbered sibling so as
1370          * to shrink a directory over time.
1371          */
1372         forward = (INT_GET(info->forw, ARCH_CONVERT) < INT_GET(info->back, ARCH_CONVERT));      /* start with smaller blk num */
1373         for (i = 0; i < 2; forward = !forward, i++) {
1374                 if (forward)
1375                         blkno = INT_GET(info->forw, ARCH_CONVERT);
1376                 else
1377                         blkno = INT_GET(info->back, ARCH_CONVERT);
1378                 if (blkno == 0)
1379                         continue;
1380                 error = xfs_da_read_buf(state->args->trans, state->args->dp,
1381                                                             blkno, -1, &bp,
1382                                                             XFS_DATA_FORK);
1383                 if (error)
1384                         return(error);
1385                 ASSERT(bp != NULL);
1386
1387                 leaf = (xfs_dir_leafblock_t *)info;
1388                 count  = INT_GET(leaf->hdr.count, ARCH_CONVERT);
1389                 bytes  = state->blocksize - (state->blocksize>>2);
1390                 bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1391                 leaf = bp->data;
1392                 ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1393                 count += INT_GET(leaf->hdr.count, ARCH_CONVERT);
1394                 bytes -= INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1395                 bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
1396                 bytes -= count * (uint)sizeof(xfs_dir_leaf_entry_t);
1397                 bytes -= (uint)sizeof(xfs_dir_leaf_hdr_t);
1398                 if (bytes >= 0)
1399                         break;  /* fits with at least 25% to spare */
1400
1401                 xfs_da_brelse(state->args->trans, bp);
1402         }
1403         if (i >= 2) {
1404                 *action = 0;
1405                 return(0);
1406         }
1407         xfs_da_buf_done(bp);
1408
1409         /*
1410          * Make altpath point to the block we want to keep (the lower
1411          * numbered block) and path point to the block we want to drop.
1412          */
1413         memcpy(&state->altpath, &state->path, sizeof(state->path));
1414         if (blkno < blk->blkno) {
1415                 error = xfs_da_path_shift(state, &state->altpath, forward,
1416                                                  0, &retval);
1417         } else {
1418                 error = xfs_da_path_shift(state, &state->path, forward,
1419                                                  0, &retval);
1420         }
1421         if (error)
1422                 return(error);
1423         if (retval) {
1424                 *action = 0;
1425         } else {
1426                 *action = 1;
1427         }
1428         return(0);
1429 }
1430
1431 /*
1432  * Remove a name from the leaf directory structure.
1433  *
1434  * Return 1 if leaf is less than 37% full, 0 if >= 37% full.
1435  * If two leaves are 37% full, when combined they will leave 25% free.
1436  */
1437 int
1438 xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
1439 {
1440         xfs_dir_leafblock_t *leaf;
1441         xfs_dir_leaf_hdr_t *hdr;
1442         xfs_dir_leaf_map_t *map;
1443         xfs_dir_leaf_entry_t *entry;
1444         xfs_dir_leaf_name_t *namest;
1445         int before, after, smallest, entsize;
1446         int tablesize, tmp, i;
1447         xfs_mount_t *mp;
1448
1449         leaf = bp->data;
1450         ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1451         hdr = &leaf->hdr;
1452         mp = trans->t_mountp;
1453         ASSERT((INT_GET(hdr->count, ARCH_CONVERT) > 0) && (INT_GET(hdr->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
1454         ASSERT((index >= 0) && (index < INT_GET(hdr->count, ARCH_CONVERT)));
1455         ASSERT(INT_GET(hdr->firstused, ARCH_CONVERT) >= ((INT_GET(hdr->count, ARCH_CONVERT)*sizeof(*entry))+sizeof(*hdr)));
1456         entry = &leaf->entries[index];
1457         ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
1458         ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
1459
1460         /*
1461          * Scan through free region table:
1462          *    check for adjacency of free'd entry with an existing one,
1463          *    find smallest free region in case we need to replace it,
1464          *    adjust any map that borders the entry table,
1465          */
1466         tablesize = INT_GET(hdr->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
1467                         + (uint)sizeof(xfs_dir_leaf_hdr_t);
1468         map = &hdr->freemap[0];
1469         tmp = INT_GET(map->size, ARCH_CONVERT);
1470         before = after = -1;
1471         smallest = XFS_DIR_LEAF_MAPSIZE - 1;
1472         entsize = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
1473         for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
1474                 ASSERT(INT_GET(map->base, ARCH_CONVERT) < XFS_LBSIZE(mp));
1475                 ASSERT(INT_GET(map->size, ARCH_CONVERT) < XFS_LBSIZE(mp));
1476                 if (INT_GET(map->base, ARCH_CONVERT) == tablesize) {
1477                         INT_MOD(map->base, ARCH_CONVERT, -((uint)sizeof(xfs_dir_leaf_entry_t)));
1478                         INT_MOD(map->size, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_entry_t));
1479                 }
1480
1481                 if ((INT_GET(map->base, ARCH_CONVERT) + INT_GET(map->size, ARCH_CONVERT)) == INT_GET(entry->nameidx, ARCH_CONVERT)) {
1482                         before = i;
1483                 } else if (INT_GET(map->base, ARCH_CONVERT) == (INT_GET(entry->nameidx, ARCH_CONVERT) + entsize)) {
1484                         after = i;
1485                 } else if (INT_GET(map->size, ARCH_CONVERT) < tmp) {
1486                         tmp = INT_GET(map->size, ARCH_CONVERT);
1487                         smallest = i;
1488                 }
1489         }
1490
1491         /*
1492          * Coalesce adjacent freemap regions,
1493          * or replace the smallest region.
1494          */
1495         if ((before >= 0) || (after >= 0)) {
1496                 if ((before >= 0) && (after >= 0)) {
1497                         map = &hdr->freemap[before];
1498                         INT_MOD(map->size, ARCH_CONVERT, entsize);
1499                         INT_MOD(map->size, ARCH_CONVERT, INT_GET(hdr->freemap[after].size, ARCH_CONVERT));
1500                         hdr->freemap[after].base = 0;
1501                         hdr->freemap[after].size = 0;
1502                 } else if (before >= 0) {
1503                         map = &hdr->freemap[before];
1504                         INT_MOD(map->size, ARCH_CONVERT, entsize);
1505                 } else {
1506                         map = &hdr->freemap[after];
1507                         INT_COPY(map->base, entry->nameidx, ARCH_CONVERT);
1508                         INT_MOD(map->size, ARCH_CONVERT, entsize);
1509                 }
1510         } else {
1511                 /*
1512                  * Replace smallest region (if it is smaller than free'd entry)
1513                  */
1514                 map = &hdr->freemap[smallest];
1515                 if (INT_GET(map->size, ARCH_CONVERT) < entsize) {
1516                         INT_COPY(map->base, entry->nameidx, ARCH_CONVERT);
1517                         INT_SET(map->size, ARCH_CONVERT, entsize);
1518                 }
1519         }
1520
1521         /*
1522          * Did we remove the first entry?
1523          */
1524         if (INT_GET(entry->nameidx, ARCH_CONVERT) == INT_GET(hdr->firstused, ARCH_CONVERT))
1525                 smallest = 1;
1526         else
1527                 smallest = 0;
1528
1529         /*
1530          * Compress the remaining entries and zero out the removed stuff.
1531          */
1532         namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
1533         memset((char *)namest, 0, entsize);
1534         xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize));
1535
1536         INT_MOD(hdr->namebytes, ARCH_CONVERT, -(entry->namelen));
1537         tmp = (INT_GET(hdr->count, ARCH_CONVERT) - index) * (uint)sizeof(xfs_dir_leaf_entry_t);
1538         memmove(entry, entry + 1, tmp);
1539         INT_MOD(hdr->count, ARCH_CONVERT, -1);
1540         xfs_da_log_buf(trans, bp,
1541             XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
1542         entry = &leaf->entries[INT_GET(hdr->count, ARCH_CONVERT)];
1543         memset((char *)entry, 0, sizeof(xfs_dir_leaf_entry_t));
1544
1545         /*
1546          * If we removed the first entry, re-find the first used byte
1547          * in the name area.  Note that if the entry was the "firstused",
1548          * then we don't have a "hole" in our block resulting from
1549          * removing the name.
1550          */
1551         if (smallest) {
1552                 tmp = XFS_LBSIZE(mp);
1553                 entry = &leaf->entries[0];
1554                 for (i = INT_GET(hdr->count, ARCH_CONVERT)-1; i >= 0; entry++, i--) {
1555                         ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) >= INT_GET(hdr->firstused, ARCH_CONVERT));
1556                         ASSERT(INT_GET(entry->nameidx, ARCH_CONVERT) < XFS_LBSIZE(mp));
1557                         if (INT_GET(entry->nameidx, ARCH_CONVERT) < tmp)
1558                                 tmp = INT_GET(entry->nameidx, ARCH_CONVERT);
1559                 }
1560                 INT_SET(hdr->firstused, ARCH_CONVERT, tmp);
1561                 if (!hdr->firstused)
1562                         INT_SET(hdr->firstused, ARCH_CONVERT, tmp - 1);
1563         } else {
1564                 hdr->holes = 1;         /* mark as needing compaction */
1565         }
1566
1567         xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
1568
1569         /*
1570          * Check if leaf is less than 50% full, caller may want to
1571          * "join" the leaf with a sibling if so.
1572          */
1573         tmp  = (uint)sizeof(xfs_dir_leaf_hdr_t);
1574         tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t);
1575         tmp += INT_GET(leaf->hdr.count, ARCH_CONVERT) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
1576         tmp += INT_GET(leaf->hdr.namebytes, ARCH_CONVERT);
1577         if (tmp < mp->m_dir_magicpct)
1578                 return(1);                      /* leaf is < 37% full */
1579         return(0);
1580 }
1581
1582 /*
1583  * Move all the directory entries from drop_leaf into save_leaf.
1584  */
1585 void
1586 xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1587                                       xfs_da_state_blk_t *save_blk)
1588 {
1589         xfs_dir_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf;
1590         xfs_dir_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr;
1591         xfs_mount_t *mp;
1592         char *tmpbuffer;
1593
1594         /*
1595          * Set up environment.
1596          */
1597         mp = state->mp;
1598         ASSERT(drop_blk->magic == XFS_DIR_LEAF_MAGIC);
1599         ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC);
1600         drop_leaf = drop_blk->bp->data;
1601         save_leaf = save_blk->bp->data;
1602         ASSERT(INT_GET(drop_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1603         ASSERT(INT_GET(save_leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1604         drop_hdr = &drop_leaf->hdr;
1605         save_hdr = &save_leaf->hdr;
1606
1607         /*
1608          * Save last hashval from dying block for later Btree fixup.
1609          */
1610         drop_blk->hashval = INT_GET(drop_leaf->entries[ drop_leaf->hdr.count-1 ].hashval, ARCH_CONVERT);
1611
1612         /*
1613          * Check if we need a temp buffer, or can we do it in place.
1614          * Note that we don't check "leaf" for holes because we will
1615          * always be dropping it, toosmall() decided that for us already.
1616          */
1617         if (save_hdr->holes == 0) {
1618                 /*
1619                  * dest leaf has no holes, so we add there.  May need
1620                  * to make some room in the entry array.
1621                  */
1622                 if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
1623                         xfs_dir_leaf_moveents(drop_leaf, 0, save_leaf, 0,
1624                                                  (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
1625                 } else {
1626                         xfs_dir_leaf_moveents(drop_leaf, 0,
1627                                               save_leaf, INT_GET(save_hdr->count, ARCH_CONVERT),
1628                                               (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
1629                 }
1630         } else {
1631                 /*
1632                  * Destination has holes, so we make a temporary copy
1633                  * of the leaf and add them both to that.
1634                  */
1635                 tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
1636                 ASSERT(tmpbuffer != NULL);
1637                 memset(tmpbuffer, 0, state->blocksize);
1638                 tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer;
1639                 tmp_hdr = &tmp_leaf->hdr;
1640                 tmp_hdr->info = save_hdr->info; /* struct copy */
1641                 tmp_hdr->count = 0;
1642                 INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize);
1643                 if (!tmp_hdr->firstused)
1644                         INT_SET(tmp_hdr->firstused, ARCH_CONVERT, state->blocksize - 1);
1645                 tmp_hdr->namebytes = 0;
1646                 if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
1647                         xfs_dir_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
1648                                                  (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
1649                         xfs_dir_leaf_moveents(save_leaf, 0,
1650                                               tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
1651                                               (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
1652                 } else {
1653                         xfs_dir_leaf_moveents(save_leaf, 0, tmp_leaf, 0,
1654                                                  (int)INT_GET(save_hdr->count, ARCH_CONVERT), mp);
1655                         xfs_dir_leaf_moveents(drop_leaf, 0,
1656                                               tmp_leaf, INT_GET(tmp_leaf->hdr.count, ARCH_CONVERT),
1657                                               (int)INT_GET(drop_hdr->count, ARCH_CONVERT), mp);
1658                 }
1659                 memcpy(save_leaf, tmp_leaf, state->blocksize);
1660                 kmem_free(tmpbuffer, state->blocksize);
1661         }
1662
1663         xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
1664                                            state->blocksize - 1);
1665
1666         /*
1667          * Copy out last hashval in each block for B-tree code.
1668          */
1669         save_blk->hashval = INT_GET(save_leaf->entries[ INT_GET(save_leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT);
1670 }
1671
1672 /*========================================================================
1673  * Routines used for finding things in the Btree.
1674  *========================================================================*/
1675
1676 /*
1677  * Look up a name in a leaf directory structure.
1678  * This is the internal routine, it uses the caller's buffer.
1679  *
1680  * Note that duplicate keys are allowed, but only check within the
1681  * current leaf node.  The Btree code must check in adjacent leaf nodes.
1682  *
1683  * Return in *index the index into the entry[] array of either the found
1684  * entry, or where the entry should have been (insert before that entry).
1685  *
1686  * Don't change the args->inumber unless we find the filename.
1687  */
1688 int
1689 xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
1690 {
1691         xfs_dir_leafblock_t *leaf;
1692         xfs_dir_leaf_entry_t *entry;
1693         xfs_dir_leaf_name_t *namest;
1694         int probe, span;
1695         xfs_dahash_t hashval;
1696
1697         leaf = bp->data;
1698         ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1699         ASSERT(INT_GET(leaf->hdr.count, ARCH_CONVERT) < (XFS_LBSIZE(args->dp->i_mount)/8));
1700
1701         /*
1702          * Binary search.  (note: small blocks will skip this loop)
1703          */
1704         hashval = args->hashval;
1705         probe = span = INT_GET(leaf->hdr.count, ARCH_CONVERT) / 2;
1706         for (entry = &leaf->entries[probe]; span > 4;
1707                    entry = &leaf->entries[probe]) {
1708                 span /= 2;
1709                 if (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)
1710                         probe += span;
1711                 else if (INT_GET(entry->hashval, ARCH_CONVERT) > hashval)
1712                         probe -= span;
1713                 else
1714                         break;
1715         }
1716         ASSERT((probe >= 0) && \
1717                ((!leaf->hdr.count) || (probe < INT_GET(leaf->hdr.count, ARCH_CONVERT))));
1718         ASSERT((span <= 4) || (INT_GET(entry->hashval, ARCH_CONVERT) == hashval));
1719
1720         /*
1721          * Since we may have duplicate hashval's, find the first matching
1722          * hashval in the leaf.
1723          */
1724         while ((probe > 0) && (INT_GET(entry->hashval, ARCH_CONVERT) >= hashval)) {
1725                 entry--;
1726                 probe--;
1727         }
1728         while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) < hashval)) {
1729                 entry++;
1730                 probe++;
1731         }
1732         if ((probe == INT_GET(leaf->hdr.count, ARCH_CONVERT)) || (INT_GET(entry->hashval, ARCH_CONVERT) != hashval)) {
1733                 *index = probe;
1734                 ASSERT(args->oknoent);
1735                 return(XFS_ERROR(ENOENT));
1736         }
1737
1738         /*
1739          * Duplicate keys may be present, so search all of them for a match.
1740          */
1741         while ((probe < INT_GET(leaf->hdr.count, ARCH_CONVERT)) && (INT_GET(entry->hashval, ARCH_CONVERT) == hashval)) {
1742                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, INT_GET(entry->nameidx, ARCH_CONVERT));
1743                 if (entry->namelen == args->namelen &&
1744                     namest->name[0] == args->name[0] &&
1745                     memcmp(args->name, namest->name, args->namelen) == 0) {
1746                         XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args->inumber);
1747                         *index = probe;
1748                         return(XFS_ERROR(EEXIST));
1749                 }
1750                 entry++;
1751                 probe++;
1752         }
1753         *index = probe;
1754         ASSERT(probe == INT_GET(leaf->hdr.count, ARCH_CONVERT) || args->oknoent);
1755         return(XFS_ERROR(ENOENT));
1756 }
1757
1758 /*========================================================================
1759  * Utility routines.
1760  *========================================================================*/
1761
1762 /*
1763  * Move the indicated entries from one leaf to another.
1764  * NOTE: this routine modifies both source and destination leaves.
1765  */
1766 /* ARGSUSED */
1767 STATIC void
1768 xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
1769                       xfs_dir_leafblock_t *leaf_d, int start_d,
1770                       int count, xfs_mount_t *mp)
1771 {
1772         xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
1773         xfs_dir_leaf_entry_t *entry_s, *entry_d;
1774         int tmp, i;
1775
1776         /*
1777          * Check for nothing to do.
1778          */
1779         if (count == 0)
1780                 return;
1781
1782         /*
1783          * Set up environment.
1784          */
1785         ASSERT(INT_GET(leaf_s->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1786         ASSERT(INT_GET(leaf_d->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1787         hdr_s = &leaf_s->hdr;
1788         hdr_d = &leaf_d->hdr;
1789         ASSERT((INT_GET(hdr_s->count, ARCH_CONVERT) > 0) && (INT_GET(hdr_s->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8)));
1790         ASSERT(INT_GET(hdr_s->firstused, ARCH_CONVERT) >=
1791                 ((INT_GET(hdr_s->count, ARCH_CONVERT)*sizeof(*entry_s))+sizeof(*hdr_s)));
1792         ASSERT(INT_GET(hdr_d->count, ARCH_CONVERT) < (XFS_LBSIZE(mp)/8));
1793         ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >=
1794                 ((INT_GET(hdr_d->count, ARCH_CONVERT)*sizeof(*entry_d))+sizeof(*hdr_d)));
1795
1796         ASSERT(start_s < INT_GET(hdr_s->count, ARCH_CONVERT));
1797         ASSERT(start_d <= INT_GET(hdr_d->count, ARCH_CONVERT));
1798         ASSERT(count <= INT_GET(hdr_s->count, ARCH_CONVERT));
1799
1800         /*
1801          * Move the entries in the destination leaf up to make a hole?
1802          */
1803         if (start_d < INT_GET(hdr_d->count, ARCH_CONVERT)) {
1804                 tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) - start_d;
1805                 tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
1806                 entry_s = &leaf_d->entries[start_d];
1807                 entry_d = &leaf_d->entries[start_d + count];
1808                 memcpy(entry_d, entry_s, tmp);
1809         }
1810
1811         /*
1812          * Copy all entry's in the same (sorted) order,
1813          * but allocate filenames packed and in sequence.
1814          */
1815         entry_s = &leaf_s->entries[start_s];
1816         entry_d = &leaf_d->entries[start_d];
1817         for (i = 0; i < count; entry_s++, entry_d++, i++) {
1818                 ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) >= INT_GET(hdr_s->firstused, ARCH_CONVERT));
1819                 tmp = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry_s);
1820                 INT_MOD(hdr_d->firstused, ARCH_CONVERT, -(tmp));
1821                 entry_d->hashval = entry_s->hashval; /* INT_: direct copy */
1822                 INT_COPY(entry_d->nameidx, hdr_d->firstused, ARCH_CONVERT);
1823                 entry_d->namelen = entry_s->namelen;
1824                 ASSERT(INT_GET(entry_d->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
1825                 memcpy(XFS_DIR_LEAF_NAMESTRUCT(leaf_d, INT_GET(entry_d->nameidx, ARCH_CONVERT)),
1826                        XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)), tmp);
1827                 ASSERT(INT_GET(entry_s->nameidx, ARCH_CONVERT) + tmp <= XFS_LBSIZE(mp));
1828                 memset((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s, INT_GET(entry_s->nameidx, ARCH_CONVERT)),
1829                       0, tmp);
1830                 INT_MOD(hdr_s->namebytes, ARCH_CONVERT, -(entry_d->namelen));
1831                 INT_MOD(hdr_d->namebytes, ARCH_CONVERT, entry_d->namelen);
1832                 INT_MOD(hdr_s->count, ARCH_CONVERT, -1);
1833                 INT_MOD(hdr_d->count, ARCH_CONVERT, +1);
1834                 tmp  = INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t)
1835                                 + (uint)sizeof(xfs_dir_leaf_hdr_t);
1836                 ASSERT(INT_GET(hdr_d->firstused, ARCH_CONVERT) >= tmp);
1837
1838         }
1839
1840         /*
1841          * Zero out the entries we just copied.
1842          */
1843         if (start_s == INT_GET(hdr_s->count, ARCH_CONVERT)) {
1844                 tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
1845                 entry_s = &leaf_s->entries[start_s];
1846                 ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
1847                 memset((char *)entry_s, 0, tmp);
1848         } else {
1849                 /*
1850                  * Move the remaining entries down to fill the hole,
1851                  * then zero the entries at the top.
1852                  */
1853                 tmp  = INT_GET(hdr_s->count, ARCH_CONVERT) - count;
1854                 tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
1855                 entry_s = &leaf_s->entries[start_s + count];
1856                 entry_d = &leaf_s->entries[start_s];
1857                 memcpy(entry_d, entry_s, tmp);
1858
1859                 tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
1860                 entry_s = &leaf_s->entries[INT_GET(hdr_s->count, ARCH_CONVERT)];
1861                 ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
1862                 memset((char *)entry_s, 0, tmp);
1863         }
1864
1865         /*
1866          * Fill in the freemap information
1867          */
1868         INT_SET(hdr_d->freemap[0].base, ARCH_CONVERT, (uint)sizeof(xfs_dir_leaf_hdr_t));
1869         INT_MOD(hdr_d->freemap[0].base, ARCH_CONVERT, INT_GET(hdr_d->count, ARCH_CONVERT) * (uint)sizeof(xfs_dir_leaf_entry_t));
1870         INT_SET(hdr_d->freemap[0].size, ARCH_CONVERT, INT_GET(hdr_d->firstused, ARCH_CONVERT) - INT_GET(hdr_d->freemap[0].base, ARCH_CONVERT));
1871         INT_SET(hdr_d->freemap[1].base, ARCH_CONVERT, (hdr_d->freemap[2].base = 0));
1872         INT_SET(hdr_d->freemap[1].size, ARCH_CONVERT, (hdr_d->freemap[2].size = 0));
1873         hdr_s->holes = 1;       /* leaf may not be compact */
1874 }
1875
1876 /*
1877  * Compare two leaf blocks "order".
1878  */
1879 int
1880 xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
1881 {
1882         xfs_dir_leafblock_t *leaf1, *leaf2;
1883
1884         leaf1 = leaf1_bp->data;
1885         leaf2 = leaf2_bp->data;
1886         ASSERT((INT_GET(leaf1->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC) &&
1887                (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC));
1888         if ((INT_GET(leaf1->hdr.count, ARCH_CONVERT) > 0) && (INT_GET(leaf2->hdr.count, ARCH_CONVERT) > 0) &&
1889             ((INT_GET(leaf2->entries[ 0 ].hashval, ARCH_CONVERT) <
1890               INT_GET(leaf1->entries[ 0 ].hashval, ARCH_CONVERT)) ||
1891              (INT_GET(leaf2->entries[ INT_GET(leaf2->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT) <
1892               INT_GET(leaf1->entries[ INT_GET(leaf1->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT)))) {
1893                 return(1);
1894         }
1895         return(0);
1896 }
1897
1898 /*
1899  * Pick up the last hashvalue from a leaf block.
1900  */
1901 xfs_dahash_t
1902 xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count)
1903 {
1904         xfs_dir_leafblock_t *leaf;
1905
1906         leaf = bp->data;
1907         ASSERT(INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) == XFS_DIR_LEAF_MAGIC);
1908         if (count)
1909                 *count = INT_GET(leaf->hdr.count, ARCH_CONVERT);
1910         if (!leaf->hdr.count)
1911                 return(0);
1912         return(INT_GET(leaf->entries[ INT_GET(leaf->hdr.count, ARCH_CONVERT)-1 ].hashval, ARCH_CONVERT));
1913 }
1914
1915 /*
1916  * Copy out directory entries for getdents(), for leaf directories.
1917  */
1918 int
1919 xfs_dir_leaf_getdents_int(
1920         xfs_dabuf_t     *bp,
1921         xfs_inode_t     *dp,
1922         xfs_dablk_t     bno,
1923         uio_t           *uio,
1924         int             *eobp,
1925         xfs_dirent_t    *dbp,
1926         xfs_dir_put_t   put,
1927         xfs_daddr_t             nextda)
1928 {
1929         xfs_dir_leafblock_t     *leaf;
1930         xfs_dir_leaf_entry_t    *entry;
1931         xfs_dir_leaf_name_t     *namest;
1932         int                     entno, want_entno, i, nextentno;
1933         xfs_mount_t             *mp;
1934         xfs_dahash_t            cookhash;
1935         xfs_dahash_t            nexthash = 0;
1936 #if (BITS_PER_LONG == 32)
1937         xfs_dahash_t            lasthash = XFS_DA_MAXHASH;
1938 #endif
1939         xfs_dir_put_args_t      p;
1940
1941         mp = dp->i_mount;
1942         leaf = bp->data;
1943         if (INT_GET(leaf->hdr.info.magic, ARCH_CONVERT) != XFS_DIR_LEAF_MAGIC) {
1944                 *eobp = 1;
1945                 return(XFS_ERROR(ENOENT));      /* XXX wrong code */
1946         }
1947
1948         want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
1949
1950         cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
1951
1952         xfs_dir_trace_g_dul("leaf: start", dp, uio, leaf);
1953
1954         /*
1955          * Re-find our place.
1956          */
1957         for (i = entno = 0, entry = &leaf->entries[0];
1958                      i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
1959                              entry++, i++) {
1960
1961                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
1962                                     INT_GET(entry->nameidx, ARCH_CONVERT));
1963
1964                 if (unlikely(
1965                     ((char *)namest < (char *)leaf) ||
1966                     ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) {
1967                         XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(1)",
1968                                              XFS_ERRLEVEL_LOW, mp, leaf);
1969                         xfs_dir_trace_g_du("leaf: corrupted", dp, uio);
1970                         return XFS_ERROR(EFSCORRUPTED);
1971                 }
1972                 if (INT_GET(entry->hashval, ARCH_CONVERT) >= cookhash) {
1973                         if (   entno < want_entno
1974                             && INT_GET(entry->hashval, ARCH_CONVERT)
1975                                                         == cookhash) {
1976                                 /*
1977                                  * Trying to get to a particular offset in a
1978                                  * run of equal-hashval entries.
1979                                  */
1980                                 entno++;
1981                         } else if (   want_entno > 0
1982                                    && entno == want_entno
1983                                    && INT_GET(entry->hashval, ARCH_CONVERT)
1984                                                         == cookhash) {
1985                                 break;
1986                         } else {
1987                                 entno = 0;
1988                                 break;
1989                         }
1990                 }
1991         }
1992
1993         if (i == INT_GET(leaf->hdr.count, ARCH_CONVERT)) {
1994                 xfs_dir_trace_g_du("leaf: hash not found", dp, uio);
1995                 if (!INT_GET(leaf->hdr.info.forw, ARCH_CONVERT))
1996                         uio->uio_offset =
1997                                 XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
1998                 /*
1999                  * Don't set uio_offset if there's another block:
2000                  * the node code will be setting uio_offset anyway.
2001                  */
2002                 *eobp = 0;
2003                 return(0);
2004         }
2005         xfs_dir_trace_g_due("leaf: hash found", dp, uio, entry);
2006
2007         p.dbp = dbp;
2008         p.put = put;
2009         p.uio = uio;
2010
2011         /*
2012          * We're synchronized, start copying entries out to the user.
2013          */
2014         for (; entno >= 0 && i < INT_GET(leaf->hdr.count, ARCH_CONVERT);
2015                              entry++, i++, (entno = nextentno)) {
2016                 int lastresid=0, retval;
2017                 xfs_dircook_t lastoffset;
2018                 xfs_dahash_t thishash;
2019
2020                 /*
2021                  * Check for a damaged directory leaf block and pick up
2022                  * the inode number from this entry.
2023                  */
2024                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
2025                                     INT_GET(entry->nameidx, ARCH_CONVERT));
2026
2027                 if (unlikely(
2028                     ((char *)namest < (char *)leaf) ||
2029                     ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) {
2030                         XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(2)",
2031                                              XFS_ERRLEVEL_LOW, mp, leaf);
2032                         xfs_dir_trace_g_du("leaf: corrupted", dp, uio);
2033                         return XFS_ERROR(EFSCORRUPTED);
2034                 }
2035
2036                 xfs_dir_trace_g_duc("leaf: middle cookie  ",
2037                                                    dp, uio, p.cook.o);
2038
2039                 if (i < (INT_GET(leaf->hdr.count, ARCH_CONVERT) - 1)) {
2040                         nexthash = INT_GET(entry[1].hashval, ARCH_CONVERT);
2041
2042                         if (nexthash == INT_GET(entry->hashval, ARCH_CONVERT))
2043                                 nextentno = entno + 1;
2044                         else
2045                                 nextentno = 0;
2046                         XFS_PUT_COOKIE(p.cook, mp, bno, nextentno, nexthash);
2047                         xfs_dir_trace_g_duc("leaf: middle cookie  ",
2048                                                    dp, uio, p.cook.o);
2049
2050                 } else if ((thishash = INT_GET(leaf->hdr.info.forw,
2051                                                         ARCH_CONVERT))) {
2052                         xfs_dabuf_t *bp2;
2053                         xfs_dir_leafblock_t *leaf2;
2054
2055                         ASSERT(nextda != -1);
2056
2057                         retval = xfs_da_read_buf(dp->i_transp, dp, thishash,
2058                                                  nextda, &bp2, XFS_DATA_FORK);
2059                         if (retval)
2060                                 return(retval);
2061
2062                         ASSERT(bp2 != NULL);
2063
2064                         leaf2 = bp2->data;
2065
2066                         if (unlikely(
2067                                (INT_GET(leaf2->hdr.info.magic, ARCH_CONVERT)
2068                                                 != XFS_DIR_LEAF_MAGIC)
2069                             || (INT_GET(leaf2->hdr.info.back, ARCH_CONVERT)
2070                                                 != bno))) {     /* GROT */
2071                                 XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(3)",
2072                                                      XFS_ERRLEVEL_LOW, mp,
2073                                                      leaf2);
2074                                 xfs_da_brelse(dp->i_transp, bp2);
2075
2076                                 return(XFS_ERROR(EFSCORRUPTED));
2077                         }
2078
2079                         nexthash = INT_GET(leaf2->entries[0].hashval,
2080                                                                 ARCH_CONVERT);
2081                         nextentno = -1;
2082                         XFS_PUT_COOKIE(p.cook, mp, thishash, 0, nexthash);
2083                         xfs_da_brelse(dp->i_transp, bp2);
2084                         xfs_dir_trace_g_duc("leaf: next blk cookie",
2085                                                    dp, uio, p.cook.o);
2086                 } else {
2087                         nextentno = -1;
2088                         XFS_PUT_COOKIE(p.cook, mp, 0, 0, XFS_DA_MAXHASH);
2089                 }
2090
2091                 /*
2092                  * Save off the cookie so we can fall back should the
2093                  * 'put' into the outgoing buffer fails.  To handle a run
2094                  * of equal-hashvals, the off_t structure on 64bit
2095                  * builds has entno built into the cookie to ID the
2096                  * entry.  On 32bit builds, we only have space for the
2097                  * hashval so we can't ID specific entries within a group
2098                  * of same hashval entries.   For this, lastoffset is set
2099                  * to the first in the run of equal hashvals so we don't
2100                  * include any entries unless we can include all entries
2101                  * that share the same hashval.  Hopefully the buffer
2102                  * provided is big enough to handle it (see pv763517).
2103                  */
2104 #if (BITS_PER_LONG == 32)
2105                 if ((thishash = INT_GET(entry->hashval, ARCH_CONVERT))
2106                                                                 != lasthash) {
2107                         XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash);
2108                         lastresid = uio->uio_resid;
2109                         lasthash = thishash;
2110                 } else {
2111                         xfs_dir_trace_g_duc("leaf: DUP COOKIES, skipped",
2112                                                    dp, uio, p.cook.o);
2113                 }
2114 #else
2115                 thishash = INT_GET(entry->hashval, ARCH_CONVERT);
2116                 XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash);
2117                 lastresid = uio->uio_resid;
2118 #endif /* BITS_PER_LONG == 32 */
2119
2120                 /*
2121                  * Put the current entry into the outgoing buffer.  If we fail
2122                  * then restore the UIO to the first entry in the current
2123                  * run of equal-hashval entries (probably one 1 entry long).
2124                  */
2125                 p.ino = XFS_GET_DIR_INO8(namest->inumber);
2126 #if XFS_BIG_INUMS
2127                 p.ino += mp->m_inoadd;
2128 #endif
2129                 p.name = (char *)namest->name;
2130                 p.namelen = entry->namelen;
2131
2132                 retval = p.put(&p);
2133
2134                 if (!p.done) {
2135                         uio->uio_offset = lastoffset.o;
2136                         uio->uio_resid = lastresid;
2137
2138                         *eobp = 1;
2139
2140                         xfs_dir_trace_g_du("leaf: E-O-B", dp, uio);
2141
2142                         return(retval);
2143                 }
2144         }
2145
2146         uio->uio_offset = p.cook.o;
2147
2148         *eobp = 0;
2149
2150         xfs_dir_trace_g_du("leaf: E-O-F", dp, uio);
2151
2152         return(0);
2153 }
2154
2155 /*
2156  * Format a dirent64 structure and copy it out the the user's buffer.
2157  */
2158 int
2159 xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa)
2160 {
2161         iovec_t *iovp;
2162         int reclen, namelen;
2163         xfs_dirent_t *idbp;
2164         uio_t *uio;
2165
2166         namelen = pa->namelen;
2167         reclen = DIRENTSIZE(namelen);
2168         uio = pa->uio;
2169         if (reclen > uio->uio_resid) {
2170                 pa->done = 0;
2171                 return 0;
2172         }
2173         iovp = uio->uio_iov;
2174         idbp = (xfs_dirent_t *)iovp->iov_base;
2175         iovp->iov_base = (char *)idbp + reclen;
2176         iovp->iov_len -= reclen;
2177         uio->uio_resid -= reclen;
2178         idbp->d_reclen = reclen;
2179         idbp->d_ino = pa->ino;
2180         idbp->d_off = pa->cook.o;
2181         idbp->d_name[namelen] = '\0';
2182         pa->done = 1;
2183         memcpy(idbp->d_name, pa->name, namelen);
2184         return 0;
2185 }
2186
2187 /*
2188  * Format a dirent64 structure and copy it out the the user's buffer.
2189  */
2190 int
2191 xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa)
2192 {
2193         int             retval, reclen, namelen;
2194         xfs_dirent_t    *idbp;
2195         uio_t           *uio;
2196
2197         namelen = pa->namelen;
2198         reclen = DIRENTSIZE(namelen);
2199         uio = pa->uio;
2200         if (reclen > uio->uio_resid) {
2201                 pa->done = 0;
2202                 return 0;
2203         }
2204         idbp = pa->dbp;
2205         idbp->d_reclen = reclen;
2206         idbp->d_ino = pa->ino;
2207         idbp->d_off = pa->cook.o;
2208         idbp->d_name[namelen] = '\0';
2209         memcpy(idbp->d_name, pa->name, namelen);
2210         retval = uio_read((caddr_t)idbp, reclen, uio);
2211         pa->done = (retval == 0);
2212         return retval;
2213 }