[XFS] Shutdown the filesystem if all device paths have gone. Made
[linux-2.6] / fs / xfs / xfs_dir_leaf.c
1 /*
2  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_types.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_dir.h"
26 #include "xfs_dir2.h"
27 #include "xfs_dmapi.h"
28 #include "xfs_mount.h"
29 #include "xfs_da_btree.h"
30 #include "xfs_bmap_btree.h"
31 #include "xfs_alloc_btree.h"
32 #include "xfs_ialloc_btree.h"
33 #include "xfs_dir_sf.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_inode_item.h"
39 #include "xfs_alloc.h"
40 #include "xfs_btree.h"
41 #include "xfs_bmap.h"
42 #include "xfs_dir_leaf.h"
43 #include "xfs_error.h"
44
45 /*
46  * xfs_dir_leaf.c
47  *
48  * Routines to implement leaf blocks of directories as Btrees of hashed names.
49  */
50
51 /*========================================================================
52  * Function prototypes for the kernel.
53  *========================================================================*/
54
55 /*
56  * Routines used for growing the Btree.
57  */
58 STATIC void xfs_dir_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args,
59                                               int insertion_index,
60                                               int freemap_index);
61 STATIC int xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer,
62                                             int musthave, int justcheck);
63 STATIC void xfs_dir_leaf_rebalance(xfs_da_state_t *state,
64                                                   xfs_da_state_blk_t *blk1,
65                                                   xfs_da_state_blk_t *blk2);
66 STATIC int xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
67                                           xfs_da_state_blk_t *leaf_blk_1,
68                                           xfs_da_state_blk_t *leaf_blk_2,
69                                           int *number_entries_in_blk1,
70                                           int *number_namebytes_in_blk1);
71
72 STATIC int xfs_dir_leaf_create(struct xfs_da_args *args,
73                                 xfs_dablk_t which_block,
74                                 struct xfs_dabuf **bpp);
75
76 /*
77  * Utility routines.
78  */
79 STATIC void xfs_dir_leaf_moveents(xfs_dir_leafblock_t *src_leaf,
80                                               int src_start,
81                                               xfs_dir_leafblock_t *dst_leaf,
82                                               int dst_start, int move_count,
83                                               xfs_mount_t *mp);
84
85
86 /*========================================================================
87  * External routines when dirsize < XFS_IFORK_DSIZE(dp).
88  *========================================================================*/
89
90
91 /*
92  * Validate a given inode number.
93  */
94 int
95 xfs_dir_ino_validate(xfs_mount_t *mp, xfs_ino_t ino)
96 {
97         xfs_agblock_t   agblkno;
98         xfs_agino_t     agino;
99         xfs_agnumber_t  agno;
100         int             ino_ok;
101         int             ioff;
102
103         agno = XFS_INO_TO_AGNO(mp, ino);
104         agblkno = XFS_INO_TO_AGBNO(mp, ino);
105         ioff = XFS_INO_TO_OFFSET(mp, ino);
106         agino = XFS_OFFBNO_TO_AGINO(mp, agblkno, ioff);
107         ino_ok =
108                 agno < mp->m_sb.sb_agcount &&
109                 agblkno < mp->m_sb.sb_agblocks &&
110                 agblkno != 0 &&
111                 ioff < (1 << mp->m_sb.sb_inopblog) &&
112                 XFS_AGINO_TO_INO(mp, agno, agino) == ino;
113         if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE,
114                         XFS_RANDOM_DIR_INO_VALIDATE))) {
115                 xfs_fs_cmn_err(CE_WARN, mp, "Invalid inode number 0x%Lx",
116                                 (unsigned long long) ino);
117                 XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
118                 return XFS_ERROR(EFSCORRUPTED);
119         }
120         return 0;
121 }
122
123 /*
124  * Create the initial contents of a shortform directory.
125  */
126 int
127 xfs_dir_shortform_create(xfs_da_args_t *args, xfs_ino_t parent)
128 {
129         xfs_dir_sf_hdr_t *hdr;
130         xfs_inode_t *dp;
131
132         dp = args->dp;
133         ASSERT(dp != NULL);
134         ASSERT(dp->i_d.di_size == 0);
135         if (dp->i_d.di_format == XFS_DINODE_FMT_EXTENTS) {
136                 dp->i_df.if_flags &= ~XFS_IFEXTENTS;    /* just in case */
137                 dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
138                 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
139                 dp->i_df.if_flags |= XFS_IFINLINE;
140         }
141         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
142         ASSERT(dp->i_df.if_bytes == 0);
143         xfs_idata_realloc(dp, sizeof(*hdr), XFS_DATA_FORK);
144         hdr = (xfs_dir_sf_hdr_t *)dp->i_df.if_u1.if_data;
145         XFS_DIR_SF_PUT_DIRINO(&parent, &hdr->parent);
146
147         hdr->count = 0;
148         dp->i_d.di_size = sizeof(*hdr);
149         xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
150         return 0;
151 }
152
153 /*
154  * Add a name to the shortform directory structure.
155  * Overflow from the inode has already been checked for.
156  */
157 int
158 xfs_dir_shortform_addname(xfs_da_args_t *args)
159 {
160         xfs_dir_shortform_t *sf;
161         xfs_dir_sf_entry_t *sfe;
162         int i, offset, size;
163         xfs_inode_t *dp;
164
165         dp = args->dp;
166         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
167         /*
168          * Catch the case where the conversion from shortform to leaf
169          * failed part way through.
170          */
171         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
172                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
173                 return XFS_ERROR(EIO);
174         }
175         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
176         ASSERT(dp->i_df.if_u1.if_data != NULL);
177         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
178         sfe = &sf->list[0];
179         for (i = sf->hdr.count-1; i >= 0; i--) {
180                 if (sfe->namelen == args->namelen &&
181                     args->name[0] == sfe->name[0] &&
182                     memcmp(args->name, sfe->name, args->namelen) == 0)
183                         return XFS_ERROR(EEXIST);
184                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
185         }
186
187         offset = (int)((char *)sfe - (char *)sf);
188         size = XFS_DIR_SF_ENTSIZE_BYNAME(args->namelen);
189         xfs_idata_realloc(dp, size, XFS_DATA_FORK);
190         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
191         sfe = (xfs_dir_sf_entry_t *)((char *)sf + offset);
192
193         XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
194         sfe->namelen = args->namelen;
195         memcpy(sfe->name, args->name, sfe->namelen);
196         sf->hdr.count++;
197
198         dp->i_d.di_size += size;
199         xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
200
201         return 0;
202 }
203
204 /*
205  * Remove a name from the shortform directory structure.
206  */
207 int
208 xfs_dir_shortform_removename(xfs_da_args_t *args)
209 {
210         xfs_dir_shortform_t *sf;
211         xfs_dir_sf_entry_t *sfe;
212         int base, size = 0, i;
213         xfs_inode_t *dp;
214
215         dp = args->dp;
216         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
217         /*
218          * Catch the case where the conversion from shortform to leaf
219          * failed part way through.
220          */
221         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
222                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
223                 return XFS_ERROR(EIO);
224         }
225         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
226         ASSERT(dp->i_df.if_u1.if_data != NULL);
227         base = sizeof(xfs_dir_sf_hdr_t);
228         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
229         sfe = &sf->list[0];
230         for (i = sf->hdr.count-1; i >= 0; i--) {
231                 size = XFS_DIR_SF_ENTSIZE_BYENTRY(sfe);
232                 if (sfe->namelen == args->namelen &&
233                     sfe->name[0] == args->name[0] &&
234                     memcmp(sfe->name, args->name, args->namelen) == 0)
235                         break;
236                 base += size;
237                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
238         }
239         if (i < 0) {
240                 ASSERT(args->oknoent);
241                 return XFS_ERROR(ENOENT);
242         }
243
244         if ((base + size) != dp->i_d.di_size) {
245                 memmove(&((char *)sf)[base], &((char *)sf)[base+size],
246                                               dp->i_d.di_size - (base+size));
247         }
248         sf->hdr.count--;
249
250         xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
251         dp->i_d.di_size -= size;
252         xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
253
254         return 0;
255 }
256
257 /*
258  * Look up a name in a shortform directory structure.
259  */
260 int
261 xfs_dir_shortform_lookup(xfs_da_args_t *args)
262 {
263         xfs_dir_shortform_t *sf;
264         xfs_dir_sf_entry_t *sfe;
265         int i;
266         xfs_inode_t *dp;
267
268         dp = args->dp;
269         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
270         /*
271          * Catch the case where the conversion from shortform to leaf
272          * failed part way through.
273          */
274         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
275                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
276                 return XFS_ERROR(EIO);
277         }
278         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
279         ASSERT(dp->i_df.if_u1.if_data != NULL);
280         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
281         if (args->namelen == 2 &&
282             args->name[0] == '.' && args->name[1] == '.') {
283                 XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &args->inumber);
284                 return(XFS_ERROR(EEXIST));
285         }
286         if (args->namelen == 1 && args->name[0] == '.') {
287                 args->inumber = dp->i_ino;
288                 return(XFS_ERROR(EEXIST));
289         }
290         sfe = &sf->list[0];
291         for (i = sf->hdr.count-1; i >= 0; i--) {
292                 if (sfe->namelen == args->namelen &&
293                     sfe->name[0] == args->name[0] &&
294                     memcmp(args->name, sfe->name, args->namelen) == 0) {
295                         XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args->inumber);
296                         return(XFS_ERROR(EEXIST));
297                 }
298                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
299         }
300         ASSERT(args->oknoent);
301         return(XFS_ERROR(ENOENT));
302 }
303
304 /*
305  * Convert from using the shortform to the leaf.
306  */
307 int
308 xfs_dir_shortform_to_leaf(xfs_da_args_t *iargs)
309 {
310         xfs_inode_t *dp;
311         xfs_dir_shortform_t *sf;
312         xfs_dir_sf_entry_t *sfe;
313         xfs_da_args_t args;
314         xfs_ino_t inumber;
315         char *tmpbuffer;
316         int retval, i, size;
317         xfs_dablk_t blkno;
318         xfs_dabuf_t *bp;
319
320         dp = iargs->dp;
321         /*
322          * Catch the case where the conversion from shortform to leaf
323          * failed part way through.
324          */
325         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
326                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
327                 return XFS_ERROR(EIO);
328         }
329         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
330         ASSERT(dp->i_df.if_u1.if_data != NULL);
331         size = dp->i_df.if_bytes;
332         tmpbuffer = kmem_alloc(size, KM_SLEEP);
333         ASSERT(tmpbuffer != NULL);
334
335         memcpy(tmpbuffer, dp->i_df.if_u1.if_data, size);
336
337         sf = (xfs_dir_shortform_t *)tmpbuffer;
338         XFS_DIR_SF_GET_DIRINO(&sf->hdr.parent, &inumber);
339
340         xfs_idata_realloc(dp, -size, XFS_DATA_FORK);
341         dp->i_d.di_size = 0;
342         xfs_trans_log_inode(iargs->trans, dp, XFS_ILOG_CORE);
343         retval = xfs_da_grow_inode(iargs, &blkno);
344         if (retval)
345                 goto out;
346
347         ASSERT(blkno == 0);
348         retval = xfs_dir_leaf_create(iargs, blkno, &bp);
349         if (retval)
350                 goto out;
351         xfs_da_buf_done(bp);
352
353         args.name = ".";
354         args.namelen = 1;
355         args.hashval = xfs_dir_hash_dot;
356         args.inumber = dp->i_ino;
357         args.dp = dp;
358         args.firstblock = iargs->firstblock;
359         args.flist = iargs->flist;
360         args.total = iargs->total;
361         args.whichfork = XFS_DATA_FORK;
362         args.trans = iargs->trans;
363         args.justcheck = 0;
364         args.addname = args.oknoent = 1;
365         retval = xfs_dir_leaf_addname(&args);
366         if (retval)
367                 goto out;
368
369         args.name = "..";
370         args.namelen = 2;
371         args.hashval = xfs_dir_hash_dotdot;
372         args.inumber = inumber;
373         retval = xfs_dir_leaf_addname(&args);
374         if (retval)
375                 goto out;
376
377         sfe = &sf->list[0];
378         for (i = 0; i < sf->hdr.count; i++) {
379                 args.name = (char *)(sfe->name);
380                 args.namelen = sfe->namelen;
381                 args.hashval = xfs_da_hashname((char *)(sfe->name),
382                                                sfe->namelen);
383                 XFS_DIR_SF_GET_DIRINO(&sfe->inumber, &args.inumber);
384                 retval = xfs_dir_leaf_addname(&args);
385                 if (retval)
386                         goto out;
387                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
388         }
389         retval = 0;
390
391 out:
392         kmem_free(tmpbuffer, size);
393         return retval;
394 }
395
396 STATIC int
397 xfs_dir_shortform_compare(const void *a, const void *b)
398 {
399         xfs_dir_sf_sort_t *sa, *sb;
400
401         sa = (xfs_dir_sf_sort_t *)a;
402         sb = (xfs_dir_sf_sort_t *)b;
403         if (sa->hash < sb->hash)
404                 return -1;
405         else if (sa->hash > sb->hash)
406                 return 1;
407         else
408                 return sa->entno - sb->entno;
409 }
410
411 /*
412  * Copy out directory entries for getdents(), for shortform directories.
413  */
414 /*ARGSUSED*/
415 int
416 xfs_dir_shortform_getdents(xfs_inode_t *dp, uio_t *uio, int *eofp,
417                                        xfs_dirent_t *dbp, xfs_dir_put_t put)
418 {
419         xfs_dir_shortform_t *sf;
420         xfs_dir_sf_entry_t *sfe;
421         int retval, i, sbsize, nsbuf, lastresid=0, want_entno;
422         xfs_mount_t *mp;
423         xfs_dahash_t cookhash, hash;
424         xfs_dir_put_args_t p;
425         xfs_dir_sf_sort_t *sbuf, *sbp;
426
427         mp = dp->i_mount;
428         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
429         cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
430         want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
431         nsbuf = sf->hdr.count + 2;
432         sbsize = (nsbuf + 1) * sizeof(*sbuf);
433         sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP);
434
435         xfs_dir_trace_g_du("sf: start", dp, uio);
436
437         /*
438          * Collect all the entries into the buffer.
439          * Entry 0 is .
440          */
441         sbp->entno = 0;
442         sbp->seqno = 0;
443         sbp->hash = xfs_dir_hash_dot;
444         sbp->ino = dp->i_ino;
445         sbp->name = ".";
446         sbp->namelen = 1;
447         sbp++;
448
449         /*
450          * Entry 1 is ..
451          */
452         sbp->entno = 1;
453         sbp->seqno = 0;
454         sbp->hash = xfs_dir_hash_dotdot;
455         sbp->ino = XFS_GET_DIR_INO8(sf->hdr.parent);
456         sbp->name = "..";
457         sbp->namelen = 2;
458         sbp++;
459
460         /*
461          * Scan the directory data for the rest of the entries.
462          */
463         for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
464
465                 if (unlikely(
466                     ((char *)sfe < (char *)sf) ||
467                     ((char *)sfe >= ((char *)sf + dp->i_df.if_bytes)))) {
468                         xfs_dir_trace_g_du("sf: corrupted", dp, uio);
469                         XFS_CORRUPTION_ERROR("xfs_dir_shortform_getdents",
470                                              XFS_ERRLEVEL_LOW, mp, sfe);
471                         kmem_free(sbuf, sbsize);
472                         return XFS_ERROR(EFSCORRUPTED);
473                 }
474
475                 sbp->entno = i + 2;
476                 sbp->seqno = 0;
477                 sbp->hash = xfs_da_hashname((char *)sfe->name, sfe->namelen);
478                 sbp->ino = XFS_GET_DIR_INO8(sfe->inumber);
479                 sbp->name = (char *)sfe->name;
480                 sbp->namelen = sfe->namelen;
481                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
482                 sbp++;
483         }
484
485         /*
486          * Sort the entries on hash then entno.
487          */
488         xfs_sort(sbuf, nsbuf, sizeof(*sbuf), xfs_dir_shortform_compare);
489         /*
490          * Stuff in last entry.
491          */
492         sbp->entno = nsbuf;
493         sbp->hash = XFS_DA_MAXHASH;
494         sbp->seqno = 0;
495         /*
496          * Figure out the sequence numbers in case there's a hash duplicate.
497          */
498         for (hash = sbuf->hash, sbp = sbuf + 1;
499                                 sbp < &sbuf[nsbuf + 1]; sbp++) {
500                 if (sbp->hash == hash)
501                         sbp->seqno = sbp[-1].seqno + 1;
502                 else
503                         hash = sbp->hash;
504         }
505
506         /*
507          * Set up put routine.
508          */
509         p.dbp = dbp;
510         p.put = put;
511         p.uio = uio;
512
513         /*
514          * Find our place.
515          */
516         for (sbp = sbuf; sbp < &sbuf[nsbuf + 1]; sbp++) {
517                 if (sbp->hash > cookhash ||
518                     (sbp->hash == cookhash && sbp->seqno >= want_entno))
519                         break;
520         }
521
522         /*
523          * Did we fail to find anything?  We stop at the last entry,
524          * the one we put maxhash into.
525          */
526         if (sbp == &sbuf[nsbuf]) {
527                 kmem_free(sbuf, sbsize);
528                 xfs_dir_trace_g_du("sf: hash beyond end", dp, uio);
529                 uio->uio_offset = XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
530                 *eofp = 1;
531                 return 0;
532         }
533
534         /*
535          * Loop putting entries into the user buffer.
536          */
537         while (sbp < &sbuf[nsbuf]) {
538                 /*
539                  * Save the first resid in a run of equal-hashval entries
540                  * so that we can back them out if they don't all fit.
541                  */
542                 if (sbp->seqno == 0 || sbp == sbuf)
543                         lastresid = uio->uio_resid;
544                 XFS_PUT_COOKIE(p.cook, mp, 0, sbp[1].seqno, sbp[1].hash);
545                 p.ino = sbp->ino;
546 #if XFS_BIG_INUMS
547                 p.ino += mp->m_inoadd;
548 #endif
549                 p.name = sbp->name;
550                 p.namelen = sbp->namelen;
551                 retval = p.put(&p);
552                 if (!p.done) {
553                         uio->uio_offset =
554                                 XFS_DA_MAKE_COOKIE(mp, 0, 0, sbp->hash);
555                         kmem_free(sbuf, sbsize);
556                         uio->uio_resid = lastresid;
557                         xfs_dir_trace_g_du("sf: E-O-B", dp, uio);
558                         return retval;
559                 }
560                 sbp++;
561         }
562         kmem_free(sbuf, sbsize);
563         uio->uio_offset = p.cook.o;
564         *eofp = 1;
565         xfs_dir_trace_g_du("sf: E-O-F", dp, uio);
566         return 0;
567 }
568
569 /*
570  * Look up a name in a shortform directory structure, replace the inode number.
571  */
572 int
573 xfs_dir_shortform_replace(xfs_da_args_t *args)
574 {
575         xfs_dir_shortform_t *sf;
576         xfs_dir_sf_entry_t *sfe;
577         xfs_inode_t *dp;
578         int i;
579
580         dp = args->dp;
581         ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
582         /*
583          * Catch the case where the conversion from shortform to leaf
584          * failed part way through.
585          */
586         if (dp->i_d.di_size < sizeof(xfs_dir_sf_hdr_t)) {
587                 ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
588                 return XFS_ERROR(EIO);
589         }
590         ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
591         ASSERT(dp->i_df.if_u1.if_data != NULL);
592         sf = (xfs_dir_shortform_t *)dp->i_df.if_u1.if_data;
593         if (args->namelen == 2 &&
594             args->name[0] == '.' && args->name[1] == '.') {
595                 /* XXX - replace assert? */
596                 XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sf->hdr.parent);
597                 xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
598                 return 0;
599         }
600         ASSERT(args->namelen != 1 || args->name[0] != '.');
601         sfe = &sf->list[0];
602         for (i = sf->hdr.count-1; i >= 0; i--) {
603                 if (sfe->namelen == args->namelen &&
604                     sfe->name[0] == args->name[0] &&
605                     memcmp(args->name, sfe->name, args->namelen) == 0) {
606                         ASSERT(memcmp((char *)&args->inumber,
607                                 (char *)&sfe->inumber, sizeof(xfs_ino_t)));
608                         XFS_DIR_SF_PUT_DIRINO(&args->inumber, &sfe->inumber);
609                         xfs_trans_log_inode(args->trans, dp, XFS_ILOG_DDATA);
610                         return 0;
611                 }
612                 sfe = XFS_DIR_SF_NEXTENTRY(sfe);
613         }
614         ASSERT(args->oknoent);
615         return XFS_ERROR(ENOENT);
616 }
617
618 /*
619  * Convert a leaf directory to shortform structure
620  */
621 int
622 xfs_dir_leaf_to_shortform(xfs_da_args_t *iargs)
623 {
624         xfs_dir_leafblock_t *leaf;
625         xfs_dir_leaf_hdr_t *hdr;
626         xfs_dir_leaf_entry_t *entry;
627         xfs_dir_leaf_name_t *namest;
628         xfs_da_args_t args;
629         xfs_inode_t *dp;
630         xfs_ino_t parent = 0;
631         char *tmpbuffer;
632         int retval, i;
633         xfs_dabuf_t *bp;
634
635         dp = iargs->dp;
636         tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP);
637         ASSERT(tmpbuffer != NULL);
638
639         retval = xfs_da_read_buf(iargs->trans, iargs->dp, 0, -1, &bp,
640                                                XFS_DATA_FORK);
641         if (retval)
642                 goto out;
643         ASSERT(bp != NULL);
644         memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
645         leaf = (xfs_dir_leafblock_t *)tmpbuffer;
646         ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
647         memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
648
649         /*
650          * Find and special case the parent inode number
651          */
652         hdr = &leaf->hdr;
653         entry = &leaf->entries[0];
654         for (i = be16_to_cpu(hdr->count)-1; i >= 0; entry++, i--) {
655                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, be16_to_cpu(entry->nameidx));
656                 if ((entry->namelen == 2) &&
657                     (namest->name[0] == '.') &&
658                     (namest->name[1] == '.')) {
659                         XFS_DIR_SF_GET_DIRINO(&namest->inumber, &parent);
660                         entry->nameidx = 0;
661                 } else if ((entry->namelen == 1) && (namest->name[0] == '.')) {
662                         entry->nameidx = 0;
663                 }
664         }
665         retval = xfs_da_shrink_inode(iargs, 0, bp);
666         if (retval)
667                 goto out;
668         retval = xfs_dir_shortform_create(iargs, parent);
669         if (retval)
670                 goto out;
671
672         /*
673          * Copy the rest of the filenames
674          */
675         entry = &leaf->entries[0];
676         args.dp = dp;
677         args.firstblock = iargs->firstblock;
678         args.flist = iargs->flist;
679         args.total = iargs->total;
680         args.whichfork = XFS_DATA_FORK;
681         args.trans = iargs->trans;
682         args.justcheck = 0;
683         args.addname = args.oknoent = 1;
684         for (i = 0; i < be16_to_cpu(hdr->count); entry++, i++) {
685                 if (!entry->nameidx)
686                         continue;
687                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, be16_to_cpu(entry->nameidx));
688                 args.name = (char *)(namest->name);
689                 args.namelen = entry->namelen;
690                 args.hashval = be32_to_cpu(entry->hashval);
691                 XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args.inumber);
692                 xfs_dir_shortform_addname(&args);
693         }
694
695 out:
696         kmem_free(tmpbuffer, XFS_LBSIZE(dp->i_mount));
697         return retval;
698 }
699
700 /*
701  * Convert from using a single leaf to a root node and a leaf.
702  */
703 int
704 xfs_dir_leaf_to_node(xfs_da_args_t *args)
705 {
706         xfs_dir_leafblock_t *leaf;
707         xfs_da_intnode_t *node;
708         xfs_inode_t *dp;
709         xfs_dabuf_t *bp1, *bp2;
710         xfs_dablk_t blkno;
711         int retval;
712
713         dp = args->dp;
714         retval = xfs_da_grow_inode(args, &blkno);
715         ASSERT(blkno == 1);
716         if (retval)
717                 return retval;
718         retval = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp1,
719                                               XFS_DATA_FORK);
720         if (retval)
721                 return retval;
722         ASSERT(bp1 != NULL);
723         retval = xfs_da_get_buf(args->trans, args->dp, 1, -1, &bp2,
724                                              XFS_DATA_FORK);
725         if (retval) {
726                 xfs_da_buf_done(bp1);
727                 return retval;
728         }
729         ASSERT(bp2 != NULL);
730         memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
731         xfs_da_buf_done(bp1);
732         xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
733
734         /*
735          * Set up the new root node.
736          */
737         retval = xfs_da_node_create(args, 0, 1, &bp1, XFS_DATA_FORK);
738         if (retval) {
739                 xfs_da_buf_done(bp2);
740                 return retval;
741         }
742         node = bp1->data;
743         leaf = bp2->data;
744         ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
745         node->btree[0].hashval = leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval;
746         xfs_da_buf_done(bp2);
747         node->btree[0].before = cpu_to_be32(blkno);
748         node->hdr.count = cpu_to_be16(1);
749         xfs_da_log_buf(args->trans, bp1,
750                 XFS_DA_LOGRANGE(node, &node->btree[0], sizeof(node->btree[0])));
751         xfs_da_buf_done(bp1);
752
753         return retval;
754 }
755
756
757 /*========================================================================
758  * Routines used for growing the Btree.
759  *========================================================================*/
760
761 /*
762  * Create the initial contents of a leaf directory
763  * or a leaf in a node directory.
764  */
765 STATIC int
766 xfs_dir_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
767 {
768         xfs_dir_leafblock_t *leaf;
769         xfs_dir_leaf_hdr_t *hdr;
770         xfs_inode_t *dp;
771         xfs_dabuf_t *bp;
772         int retval;
773
774         dp = args->dp;
775         ASSERT(dp != NULL);
776         retval = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp, XFS_DATA_FORK);
777         if (retval)
778                 return retval;
779         ASSERT(bp != NULL);
780         leaf = bp->data;
781         memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
782         hdr = &leaf->hdr;
783         hdr->info.magic = cpu_to_be16(XFS_DIR_LEAF_MAGIC);
784         hdr->firstused = cpu_to_be16(XFS_LBSIZE(dp->i_mount));
785         if (!hdr->firstused)
786                 hdr->firstused = cpu_to_be16(XFS_LBSIZE(dp->i_mount) - 1);
787         hdr->freemap[0].base = cpu_to_be16(sizeof(xfs_dir_leaf_hdr_t));
788         hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) -
789                                            be16_to_cpu(hdr->freemap[0].base));
790
791         xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
792
793         *bpp = bp;
794         return 0;
795 }
796
797 /*
798  * Split the leaf node, rebalance, then add the new entry.
799  */
800 int
801 xfs_dir_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
802                                   xfs_da_state_blk_t *newblk)
803 {
804         xfs_dablk_t blkno;
805         xfs_da_args_t *args;
806         int error;
807
808         /*
809          * Allocate space for a new leaf node.
810          */
811         args = state->args;
812         ASSERT(args != NULL);
813         ASSERT(oldblk->magic == XFS_DIR_LEAF_MAGIC);
814         error = xfs_da_grow_inode(args, &blkno);
815         if (error)
816                 return error;
817         error = xfs_dir_leaf_create(args, blkno, &newblk->bp);
818         if (error)
819                 return error;
820         newblk->blkno = blkno;
821         newblk->magic = XFS_DIR_LEAF_MAGIC;
822
823         /*
824          * Rebalance the entries across the two leaves.
825          */
826         xfs_dir_leaf_rebalance(state, oldblk, newblk);
827         error = xfs_da_blk_link(state, oldblk, newblk);
828         if (error)
829                 return error;
830
831         /*
832          * Insert the new entry in the correct block.
833          */
834         if (state->inleaf) {
835                 error = xfs_dir_leaf_add(oldblk->bp, args, oldblk->index);
836         } else {
837                 error = xfs_dir_leaf_add(newblk->bp, args, newblk->index);
838         }
839
840         /*
841          * Update last hashval in each block since we added the name.
842          */
843         oldblk->hashval = xfs_dir_leaf_lasthash(oldblk->bp, NULL);
844         newblk->hashval = xfs_dir_leaf_lasthash(newblk->bp, NULL);
845         return error;
846 }
847
848 /*
849  * Add a name to the leaf directory structure.
850  *
851  * Must take into account fragmented leaves and leaves where spacemap has
852  * lost some freespace information (ie: holes).
853  */
854 int
855 xfs_dir_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index)
856 {
857         xfs_dir_leafblock_t *leaf;
858         xfs_dir_leaf_hdr_t *hdr;
859         xfs_dir_leaf_map_t *map;
860         int tablesize, entsize, sum, i, tmp, error;
861
862         leaf = bp->data;
863         ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
864         ASSERT((index >= 0) && (index <= be16_to_cpu(leaf->hdr.count)));
865         hdr = &leaf->hdr;
866         entsize = XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen);
867
868         /*
869          * Search through freemap for first-fit on new name length.
870          * (may need to figure in size of entry struct too)
871          */
872         tablesize = (be16_to_cpu(hdr->count) + 1) *
873                 sizeof(xfs_dir_leaf_entry_t) + sizeof(xfs_dir_leaf_hdr_t);
874         map = &hdr->freemap[XFS_DIR_LEAF_MAPSIZE-1];
875         for (sum = 0, i = XFS_DIR_LEAF_MAPSIZE-1; i >= 0; map--, i--) {
876                 if (tablesize > be16_to_cpu(hdr->firstused)) {
877                         sum += be16_to_cpu(map->size);
878                         continue;
879                 }
880                 if (!map->size)
881                         continue;       /* no space in this map */
882                 tmp = entsize;
883                 if (be16_to_cpu(map->base) < be16_to_cpu(hdr->firstused))
884                         tmp += (uint)sizeof(xfs_dir_leaf_entry_t);
885                 if (be16_to_cpu(map->size) >= tmp) {
886                         if (!args->justcheck)
887                                 xfs_dir_leaf_add_work(bp, args, index, i);
888                         return 0;
889                 }
890                 sum += be16_to_cpu(map->size);
891         }
892
893         /*
894          * If there are no holes in the address space of the block,
895          * and we don't have enough freespace, then compaction will do us
896          * no good and we should just give up.
897          */
898         if (!hdr->holes && (sum < entsize))
899                 return XFS_ERROR(ENOSPC);
900
901         /*
902          * Compact the entries to coalesce free space.
903          * Pass the justcheck flag so the checking pass can return
904          * an error, without changing anything, if it won't fit.
905          */
906         error = xfs_dir_leaf_compact(args->trans, bp,
907                         args->total == 0 ?
908                                 entsize +
909                                 (uint)sizeof(xfs_dir_leaf_entry_t) : 0,
910                         args->justcheck);
911         if (error)
912                 return error;
913         /*
914          * After compaction, the block is guaranteed to have only one
915          * free region, in freemap[0].  If it is not big enough, give up.
916          */
917         if (be16_to_cpu(hdr->freemap[0].size) <
918             (entsize + (uint)sizeof(xfs_dir_leaf_entry_t)))
919                 return XFS_ERROR(ENOSPC);
920
921         if (!args->justcheck)
922                 xfs_dir_leaf_add_work(bp, args, index, 0);
923         return 0;
924 }
925
926 /*
927  * Add a name to a leaf directory structure.
928  */
929 STATIC void
930 xfs_dir_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int index,
931                       int mapindex)
932 {
933         xfs_dir_leafblock_t *leaf;
934         xfs_dir_leaf_hdr_t *hdr;
935         xfs_dir_leaf_entry_t *entry;
936         xfs_dir_leaf_name_t *namest;
937         xfs_dir_leaf_map_t *map;
938         /* REFERENCED */
939         xfs_mount_t *mp;
940         int tmp, i;
941
942         leaf = bp->data;
943         ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
944         hdr = &leaf->hdr;
945         ASSERT((mapindex >= 0) && (mapindex < XFS_DIR_LEAF_MAPSIZE));
946         ASSERT((index >= 0) && (index <= be16_to_cpu(hdr->count)));
947
948         /*
949          * Force open some space in the entry array and fill it in.
950          */
951         entry = &leaf->entries[index];
952         if (index < be16_to_cpu(hdr->count)) {
953                 tmp  = be16_to_cpu(hdr->count) - index;
954                 tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
955                 memmove(entry + 1, entry, tmp);
956                 xfs_da_log_buf(args->trans, bp,
957                     XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
958         }
959         be16_add(&hdr->count, 1);
960
961         /*
962          * Allocate space for the new string (at the end of the run).
963          */
964         map = &hdr->freemap[mapindex];
965         mp = args->trans->t_mountp;
966         ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp));
967         ASSERT(be16_to_cpu(map->size) >= XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen));
968         ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp));
969
970         be16_add(&map->size, -(XFS_DIR_LEAF_ENTSIZE_BYNAME(args->namelen)));
971         entry->nameidx = cpu_to_be16(be16_to_cpu(map->base) +
972                                      be16_to_cpu(map->size));
973         entry->hashval = cpu_to_be32(args->hashval);
974         entry->namelen = args->namelen;
975         xfs_da_log_buf(args->trans, bp,
976             XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
977
978         /*
979          * Copy the string and inode number into the new space.
980          */
981         namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, be16_to_cpu(entry->nameidx));
982         XFS_DIR_SF_PUT_DIRINO(&args->inumber, &namest->inumber);
983         memcpy(namest->name, args->name, args->namelen);
984         xfs_da_log_buf(args->trans, bp,
985             XFS_DA_LOGRANGE(leaf, namest, XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry)));
986
987         /*
988          * Update the control info for this leaf node
989          */
990         if (be16_to_cpu(entry->nameidx) < be16_to_cpu(hdr->firstused))
991                 hdr->firstused = entry->nameidx;
992         ASSERT(be16_to_cpu(hdr->firstused) >=
993                ((be16_to_cpu(hdr->count)*sizeof(*entry))+sizeof(*hdr)));
994         tmp = (be16_to_cpu(hdr->count)-1) * (uint)sizeof(xfs_dir_leaf_entry_t)
995                         + (uint)sizeof(xfs_dir_leaf_hdr_t);
996         map = &hdr->freemap[0];
997         for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
998                 if (be16_to_cpu(map->base) == tmp) {
999                         int entry_size = sizeof(xfs_dir_leaf_entry_t);
1000                         be16_add(&map->base, entry_size);
1001                         be16_add(&map->size, -entry_size);
1002                 }
1003         }
1004         be16_add(&hdr->namebytes, args->namelen);
1005         xfs_da_log_buf(args->trans, bp,
1006                 XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
1007 }
1008
1009 /*
1010  * Garbage collect a leaf directory block by copying it to a new buffer.
1011  */
1012 STATIC int
1013 xfs_dir_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp, int musthave,
1014                      int justcheck)
1015 {
1016         xfs_dir_leafblock_t *leaf_s, *leaf_d;
1017         xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
1018         xfs_mount_t *mp;
1019         char *tmpbuffer;
1020         char *tmpbuffer2=NULL;
1021         int rval;
1022         int lbsize;
1023
1024         mp = trans->t_mountp;
1025         lbsize = XFS_LBSIZE(mp);
1026         tmpbuffer = kmem_alloc(lbsize, KM_SLEEP);
1027         ASSERT(tmpbuffer != NULL);
1028         memcpy(tmpbuffer, bp->data, lbsize);
1029
1030         /*
1031          * Make a second copy in case xfs_dir_leaf_moveents()
1032          * below destroys the original.
1033          */
1034         if (musthave || justcheck) {
1035                 tmpbuffer2 = kmem_alloc(lbsize, KM_SLEEP);
1036                 memcpy(tmpbuffer2, bp->data, lbsize);
1037         }
1038         memset(bp->data, 0, lbsize);
1039
1040         /*
1041          * Copy basic information
1042          */
1043         leaf_s = (xfs_dir_leafblock_t *)tmpbuffer;
1044         leaf_d = bp->data;
1045         hdr_s = &leaf_s->hdr;
1046         hdr_d = &leaf_d->hdr;
1047         hdr_d->info = hdr_s->info;      /* struct copy */
1048         hdr_d->firstused = cpu_to_be16(lbsize);
1049         if (!hdr_d->firstused)
1050                 hdr_d->firstused = cpu_to_be16(lbsize - 1);
1051         hdr_d->namebytes = 0;
1052         hdr_d->count = 0;
1053         hdr_d->holes = 0;
1054         hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_dir_leaf_hdr_t));
1055         hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) -
1056                                              be16_to_cpu(hdr_d->freemap[0].base));
1057
1058         /*
1059          * Copy all entry's in the same (sorted) order,
1060          * but allocate filenames packed and in sequence.
1061          * This changes the source (leaf_s) as well.
1062          */
1063         xfs_dir_leaf_moveents(leaf_s, 0, leaf_d, 0, be16_to_cpu(hdr_s->count), mp);
1064
1065         if (musthave && be16_to_cpu(hdr_d->freemap[0].size) < musthave)
1066                 rval = XFS_ERROR(ENOSPC);
1067         else
1068                 rval = 0;
1069
1070         if (justcheck || rval == ENOSPC) {
1071                 ASSERT(tmpbuffer2);
1072                 memcpy(bp->data, tmpbuffer2, lbsize);
1073         } else {
1074                 xfs_da_log_buf(trans, bp, 0, lbsize - 1);
1075         }
1076
1077         kmem_free(tmpbuffer, lbsize);
1078         if (musthave || justcheck)
1079                 kmem_free(tmpbuffer2, lbsize);
1080         return rval;
1081 }
1082
1083 /*
1084  * Redistribute the directory entries between two leaf nodes,
1085  * taking into account the size of the new entry.
1086  *
1087  * NOTE: if new block is empty, then it will get the upper half of old block.
1088  */
1089 STATIC void
1090 xfs_dir_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
1091                                       xfs_da_state_blk_t *blk2)
1092 {
1093         xfs_da_state_blk_t *tmp_blk;
1094         xfs_dir_leafblock_t *leaf1, *leaf2;
1095         xfs_dir_leaf_hdr_t *hdr1, *hdr2;
1096         int count, totallen, max, space, swap;
1097
1098         /*
1099          * Set up environment.
1100          */
1101         ASSERT(blk1->magic == XFS_DIR_LEAF_MAGIC);
1102         ASSERT(blk2->magic == XFS_DIR_LEAF_MAGIC);
1103         leaf1 = blk1->bp->data;
1104         leaf2 = blk2->bp->data;
1105         ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1106         ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1107
1108         /*
1109          * Check ordering of blocks, reverse if it makes things simpler.
1110          */
1111         swap = 0;
1112         if (xfs_dir_leaf_order(blk1->bp, blk2->bp)) {
1113                 tmp_blk = blk1;
1114                 blk1 = blk2;
1115                 blk2 = tmp_blk;
1116                 leaf1 = blk1->bp->data;
1117                 leaf2 = blk2->bp->data;
1118                 swap = 1;
1119         }
1120         hdr1 = &leaf1->hdr;
1121         hdr2 = &leaf2->hdr;
1122
1123         /*
1124          * Examine entries until we reduce the absolute difference in
1125          * byte usage between the two blocks to a minimum.  Then get
1126          * the direction to copy and the number of elements to move.
1127          */
1128         state->inleaf = xfs_dir_leaf_figure_balance(state, blk1, blk2,
1129                                                            &count, &totallen);
1130         if (swap)
1131                 state->inleaf = !state->inleaf;
1132
1133         /*
1134          * Move any entries required from leaf to leaf:
1135          */
1136         if (count < be16_to_cpu(hdr1->count)) {
1137                 /*
1138                  * Figure the total bytes to be added to the destination leaf.
1139                  */
1140                 count = be16_to_cpu(hdr1->count) - count;       /* number entries being moved */
1141                 space = be16_to_cpu(hdr1->namebytes) - totallen;
1142                 space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
1143                 space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
1144
1145                 /*
1146                  * leaf2 is the destination, compact it if it looks tight.
1147                  */
1148                 max  = be16_to_cpu(hdr2->firstused) - (uint)sizeof(xfs_dir_leaf_hdr_t);
1149                 max -= be16_to_cpu(hdr2->count) * (uint)sizeof(xfs_dir_leaf_entry_t);
1150                 if (space > max) {
1151                         xfs_dir_leaf_compact(state->args->trans, blk2->bp,
1152                                                                  0, 0);
1153                 }
1154
1155                 /*
1156                  * Move high entries from leaf1 to low end of leaf2.
1157                  */
1158                 xfs_dir_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count,
1159                                              leaf2, 0, count, state->mp);
1160
1161                 xfs_da_log_buf(state->args->trans, blk1->bp, 0,
1162                                                    state->blocksize-1);
1163                 xfs_da_log_buf(state->args->trans, blk2->bp, 0,
1164                                                    state->blocksize-1);
1165
1166         } else if (count > be16_to_cpu(hdr1->count)) {
1167                 /*
1168                  * Figure the total bytes to be added to the destination leaf.
1169                  */
1170                 count -= be16_to_cpu(hdr1->count);              /* number entries being moved */
1171                 space  = totallen - be16_to_cpu(hdr1->namebytes);
1172                 space += count * ((uint)sizeof(xfs_dir_leaf_name_t)-1);
1173                 space += count * (uint)sizeof(xfs_dir_leaf_entry_t);
1174
1175                 /*
1176                  * leaf1 is the destination, compact it if it looks tight.
1177                  */
1178                 max  = be16_to_cpu(hdr1->firstused) - (uint)sizeof(xfs_dir_leaf_hdr_t);
1179                 max -= be16_to_cpu(hdr1->count) * (uint)sizeof(xfs_dir_leaf_entry_t);
1180                 if (space > max) {
1181                         xfs_dir_leaf_compact(state->args->trans, blk1->bp,
1182                                                                  0, 0);
1183                 }
1184
1185                 /*
1186                  * Move low entries from leaf2 to high end of leaf1.
1187                  */
1188                 xfs_dir_leaf_moveents(leaf2, 0, leaf1, be16_to_cpu(hdr1->count),
1189                                              count, state->mp);
1190
1191                 xfs_da_log_buf(state->args->trans, blk1->bp, 0,
1192                                                    state->blocksize-1);
1193                 xfs_da_log_buf(state->args->trans, blk2->bp, 0,
1194                                                    state->blocksize-1);
1195         }
1196
1197         /*
1198          * Copy out last hashval in each block for B-tree code.
1199          */
1200         blk1->hashval = be32_to_cpu(leaf1->entries[
1201                         be16_to_cpu(leaf1->hdr.count)-1].hashval);
1202         blk2->hashval = be32_to_cpu(leaf2->entries[
1203                         be16_to_cpu(leaf2->hdr.count)-1].hashval);
1204
1205         /*
1206          * Adjust the expected index for insertion.
1207          * GROT: this doesn't work unless blk2 was originally empty.
1208          */
1209         if (!state->inleaf) {
1210                 blk2->index = blk1->index - be16_to_cpu(leaf1->hdr.count);
1211         }
1212 }
1213
1214 /*
1215  * Examine entries until we reduce the absolute difference in
1216  * byte usage between the two blocks to a minimum.
1217  * GROT: Is this really necessary?  With other than a 512 byte blocksize,
1218  * GROT: there will always be enough room in either block for a new entry.
1219  * GROT: Do a double-split for this case?
1220  */
1221 STATIC int
1222 xfs_dir_leaf_figure_balance(xfs_da_state_t *state,
1223                                            xfs_da_state_blk_t *blk1,
1224                                            xfs_da_state_blk_t *blk2,
1225                                            int *countarg, int *namebytesarg)
1226 {
1227         xfs_dir_leafblock_t *leaf1, *leaf2;
1228         xfs_dir_leaf_hdr_t *hdr1, *hdr2;
1229         xfs_dir_leaf_entry_t *entry;
1230         int count, max, totallen, half;
1231         int lastdelta, foundit, tmp;
1232
1233         /*
1234          * Set up environment.
1235          */
1236         leaf1 = blk1->bp->data;
1237         leaf2 = blk2->bp->data;
1238         hdr1 = &leaf1->hdr;
1239         hdr2 = &leaf2->hdr;
1240         foundit = 0;
1241         totallen = 0;
1242
1243         /*
1244          * Examine entries until we reduce the absolute difference in
1245          * byte usage between the two blocks to a minimum.
1246          */
1247         max = be16_to_cpu(hdr1->count) + be16_to_cpu(hdr2->count);
1248         half  = (max+1) * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
1249         half += be16_to_cpu(hdr1->namebytes) + be16_to_cpu(hdr2->namebytes) +
1250                 state->args->namelen;
1251         half /= 2;
1252         lastdelta = state->blocksize;
1253         entry = &leaf1->entries[0];
1254         for (count = 0; count < max; entry++, count++) {
1255
1256 #define XFS_DIR_ABS(A)  (((A) < 0) ? -(A) : (A))
1257                 /*
1258                  * The new entry is in the first block, account for it.
1259                  */
1260                 if (count == blk1->index) {
1261                         tmp = totallen + (uint)sizeof(*entry)
1262                                 + XFS_DIR_LEAF_ENTSIZE_BYNAME(state->args->namelen);
1263                         if (XFS_DIR_ABS(half - tmp) > lastdelta)
1264                                 break;
1265                         lastdelta = XFS_DIR_ABS(half - tmp);
1266                         totallen = tmp;
1267                         foundit = 1;
1268                 }
1269
1270                 /*
1271                  * Wrap around into the second block if necessary.
1272                  */
1273                 if (count == be16_to_cpu(hdr1->count)) {
1274                         leaf1 = leaf2;
1275                         entry = &leaf1->entries[0];
1276                 }
1277
1278                 /*
1279                  * Figure out if next leaf entry would be too much.
1280                  */
1281                 tmp = totallen + (uint)sizeof(*entry)
1282                                 + XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
1283                 if (XFS_DIR_ABS(half - tmp) > lastdelta)
1284                         break;
1285                 lastdelta = XFS_DIR_ABS(half - tmp);
1286                 totallen = tmp;
1287 #undef XFS_DIR_ABS
1288         }
1289
1290         /*
1291          * Calculate the number of namebytes that will end up in lower block.
1292          * If new entry not in lower block, fix up the count.
1293          */
1294         totallen -=
1295                 count * (uint)(sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1);
1296         if (foundit) {
1297                 totallen -= (sizeof(*entry)+sizeof(xfs_dir_leaf_entry_t)-1) +
1298                             state->args->namelen;
1299         }
1300
1301         *countarg = count;
1302         *namebytesarg = totallen;
1303         return foundit;
1304 }
1305
1306 /*========================================================================
1307  * Routines used for shrinking the Btree.
1308  *========================================================================*/
1309
1310 /*
1311  * Check a leaf block and its neighbors to see if the block should be
1312  * collapsed into one or the other neighbor.  Always keep the block
1313  * with the smaller block number.
1314  * If the current block is over 50% full, don't try to join it, return 0.
1315  * If the block is empty, fill in the state structure and return 2.
1316  * If it can be collapsed, fill in the state structure and return 1.
1317  * If nothing can be done, return 0.
1318  */
1319 int
1320 xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
1321 {
1322         xfs_dir_leafblock_t *leaf;
1323         xfs_da_state_blk_t *blk;
1324         xfs_da_blkinfo_t *info;
1325         int count, bytes, forward, error, retval, i;
1326         xfs_dablk_t blkno;
1327         xfs_dabuf_t *bp;
1328
1329         /*
1330          * Check for the degenerate case of the block being over 50% full.
1331          * If so, it's not worth even looking to see if we might be able
1332          * to coalesce with a sibling.
1333          */
1334         blk = &state->path.blk[ state->path.active-1 ];
1335         info = blk->bp->data;
1336         ASSERT(be16_to_cpu(info->magic) == XFS_DIR_LEAF_MAGIC);
1337         leaf = (xfs_dir_leafblock_t *)info;
1338         count = be16_to_cpu(leaf->hdr.count);
1339         bytes = (uint)sizeof(xfs_dir_leaf_hdr_t) +
1340                 count * (uint)sizeof(xfs_dir_leaf_entry_t) +
1341                 count * ((uint)sizeof(xfs_dir_leaf_name_t)-1) +
1342                 be16_to_cpu(leaf->hdr.namebytes);
1343         if (bytes > (state->blocksize >> 1)) {
1344                 *action = 0;    /* blk over 50%, don't try to join */
1345                 return 0;
1346         }
1347
1348         /*
1349          * Check for the degenerate case of the block being empty.
1350          * If the block is empty, we'll simply delete it, no need to
1351          * coalesce it with a sibling block.  We choose (arbitrarily)
1352          * to merge with the forward block unless it is NULL.
1353          */
1354         if (count == 0) {
1355                 /*
1356                  * Make altpath point to the block we want to keep and
1357                  * path point to the block we want to drop (this one).
1358                  */
1359                 forward = (info->forw != 0);
1360                 memcpy(&state->altpath, &state->path, sizeof(state->path));
1361                 error = xfs_da_path_shift(state, &state->altpath, forward,
1362                                                  0, &retval);
1363                 if (error)
1364                         return error;
1365                 if (retval) {
1366                         *action = 0;
1367                 } else {
1368                         *action = 2;
1369                 }
1370                 return 0;
1371         }
1372
1373         /*
1374          * Examine each sibling block to see if we can coalesce with
1375          * at least 25% free space to spare.  We need to figure out
1376          * whether to merge with the forward or the backward block.
1377          * We prefer coalescing with the lower numbered sibling so as
1378          * to shrink a directory over time.
1379          */
1380         forward = (be32_to_cpu(info->forw) < be32_to_cpu(info->back));  /* start with smaller blk num */
1381         for (i = 0; i < 2; forward = !forward, i++) {
1382                 if (forward)
1383                         blkno = be32_to_cpu(info->forw);
1384                 else
1385                         blkno = be32_to_cpu(info->back);
1386                 if (blkno == 0)
1387                         continue;
1388                 error = xfs_da_read_buf(state->args->trans, state->args->dp,
1389                                                             blkno, -1, &bp,
1390                                                             XFS_DATA_FORK);
1391                 if (error)
1392                         return error;
1393                 ASSERT(bp != NULL);
1394
1395                 leaf = (xfs_dir_leafblock_t *)info;
1396                 count  = be16_to_cpu(leaf->hdr.count);
1397                 bytes  = state->blocksize - (state->blocksize>>2);
1398                 bytes -= be16_to_cpu(leaf->hdr.namebytes);
1399                 leaf = bp->data;
1400                 ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1401                 count += be16_to_cpu(leaf->hdr.count);
1402                 bytes -= be16_to_cpu(leaf->hdr.namebytes);
1403                 bytes -= count * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
1404                 bytes -= count * (uint)sizeof(xfs_dir_leaf_entry_t);
1405                 bytes -= (uint)sizeof(xfs_dir_leaf_hdr_t);
1406                 if (bytes >= 0)
1407                         break;  /* fits with at least 25% to spare */
1408
1409                 xfs_da_brelse(state->args->trans, bp);
1410         }
1411         if (i >= 2) {
1412                 *action = 0;
1413                 return 0;
1414         }
1415         xfs_da_buf_done(bp);
1416
1417         /*
1418          * Make altpath point to the block we want to keep (the lower
1419          * numbered block) and path point to the block we want to drop.
1420          */
1421         memcpy(&state->altpath, &state->path, sizeof(state->path));
1422         if (blkno < blk->blkno) {
1423                 error = xfs_da_path_shift(state, &state->altpath, forward,
1424                                                  0, &retval);
1425         } else {
1426                 error = xfs_da_path_shift(state, &state->path, forward,
1427                                                  0, &retval);
1428         }
1429         if (error)
1430                 return error;
1431         if (retval) {
1432                 *action = 0;
1433         } else {
1434                 *action = 1;
1435         }
1436         return 0;
1437 }
1438
1439 /*
1440  * Remove a name from the leaf directory structure.
1441  *
1442  * Return 1 if leaf is less than 37% full, 0 if >= 37% full.
1443  * If two leaves are 37% full, when combined they will leave 25% free.
1444  */
1445 int
1446 xfs_dir_leaf_remove(xfs_trans_t *trans, xfs_dabuf_t *bp, int index)
1447 {
1448         xfs_dir_leafblock_t *leaf;
1449         xfs_dir_leaf_hdr_t *hdr;
1450         xfs_dir_leaf_map_t *map;
1451         xfs_dir_leaf_entry_t *entry;
1452         xfs_dir_leaf_name_t *namest;
1453         int before, after, smallest, entsize;
1454         int tablesize, tmp, i;
1455         xfs_mount_t *mp;
1456
1457         leaf = bp->data;
1458         ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1459         hdr = &leaf->hdr;
1460         mp = trans->t_mountp;
1461         ASSERT(hdr->count && (be16_to_cpu(hdr->count) < (XFS_LBSIZE(mp)/8)));
1462         ASSERT((index >= 0) && (index < be16_to_cpu(hdr->count)));
1463         ASSERT(be16_to_cpu(hdr->firstused) >=
1464                ((be16_to_cpu(hdr->count)*sizeof(*entry))+sizeof(*hdr)));
1465         entry = &leaf->entries[index];
1466         ASSERT(be16_to_cpu(entry->nameidx) >= be16_to_cpu(hdr->firstused));
1467         ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
1468
1469         /*
1470          * Scan through free region table:
1471          *    check for adjacency of free'd entry with an existing one,
1472          *    find smallest free region in case we need to replace it,
1473          *    adjust any map that borders the entry table,
1474          */
1475         tablesize = be16_to_cpu(hdr->count) * (uint)sizeof(xfs_dir_leaf_entry_t)
1476                         + (uint)sizeof(xfs_dir_leaf_hdr_t);
1477         map = &hdr->freemap[0];
1478         tmp = be16_to_cpu(map->size);
1479         before = after = -1;
1480         smallest = XFS_DIR_LEAF_MAPSIZE - 1;
1481         entsize = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry);
1482         for (i = 0; i < XFS_DIR_LEAF_MAPSIZE; map++, i++) {
1483                 ASSERT(be16_to_cpu(map->base) < XFS_LBSIZE(mp));
1484                 ASSERT(be16_to_cpu(map->size) < XFS_LBSIZE(mp));
1485                 if (be16_to_cpu(map->base) == tablesize) {
1486                         int entry_size = sizeof(xfs_dir_leaf_entry_t);
1487                         be16_add(&map->base, -entry_size);
1488                         be16_add(&map->size, entry_size);
1489                 }
1490
1491                 if ((be16_to_cpu(map->base) + be16_to_cpu(map->size)) ==
1492                                 be16_to_cpu(entry->nameidx)) {
1493                         before = i;
1494                 } else if (be16_to_cpu(map->base) ==
1495                                 (be16_to_cpu(entry->nameidx) + entsize)) {
1496                         after = i;
1497                 } else if (be16_to_cpu(map->size) < tmp) {
1498                         tmp = be16_to_cpu(map->size);
1499                         smallest = i;
1500                 }
1501         }
1502
1503         /*
1504          * Coalesce adjacent freemap regions,
1505          * or replace the smallest region.
1506          */
1507         if ((before >= 0) || (after >= 0)) {
1508                 if ((before >= 0) && (after >= 0)) {
1509                         map = &hdr->freemap[before];
1510                         be16_add(&map->size, entsize);
1511                         be16_add(&map->size, be16_to_cpu(hdr->freemap[after].size));
1512                         hdr->freemap[after].base = 0;
1513                         hdr->freemap[after].size = 0;
1514                 } else if (before >= 0) {
1515                         map = &hdr->freemap[before];
1516                         be16_add(&map->size, entsize);
1517                 } else {
1518                         map = &hdr->freemap[after];
1519                         map->base = entry->nameidx;
1520                         be16_add(&map->size, entsize);
1521                 }
1522         } else {
1523                 /*
1524                  * Replace smallest region (if it is smaller than free'd entry)
1525                  */
1526                 map = &hdr->freemap[smallest];
1527                 if (be16_to_cpu(map->size) < entsize) {
1528                         map->base = entry->nameidx;
1529                         map->size = cpu_to_be16(entsize);
1530                 }
1531         }
1532
1533         /*
1534          * Did we remove the first entry?
1535          */
1536         if (be16_to_cpu(entry->nameidx) == be16_to_cpu(hdr->firstused))
1537                 smallest = 1;
1538         else
1539                 smallest = 0;
1540
1541         /*
1542          * Compress the remaining entries and zero out the removed stuff.
1543          */
1544         namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, be16_to_cpu(entry->nameidx));
1545         memset((char *)namest, 0, entsize);
1546         xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, namest, entsize));
1547
1548         be16_add(&hdr->namebytes, -(entry->namelen));
1549         tmp = (be16_to_cpu(hdr->count) - index) * (uint)sizeof(xfs_dir_leaf_entry_t);
1550         memmove(entry, entry + 1, tmp);
1551         be16_add(&hdr->count, -1);
1552         xfs_da_log_buf(trans, bp,
1553             XFS_DA_LOGRANGE(leaf, entry, tmp + (uint)sizeof(*entry)));
1554         entry = &leaf->entries[be16_to_cpu(hdr->count)];
1555         memset((char *)entry, 0, sizeof(xfs_dir_leaf_entry_t));
1556
1557         /*
1558          * If we removed the first entry, re-find the first used byte
1559          * in the name area.  Note that if the entry was the "firstused",
1560          * then we don't have a "hole" in our block resulting from
1561          * removing the name.
1562          */
1563         if (smallest) {
1564                 tmp = XFS_LBSIZE(mp);
1565                 entry = &leaf->entries[0];
1566                 for (i = be16_to_cpu(hdr->count)-1; i >= 0; entry++, i--) {
1567                         ASSERT(be16_to_cpu(entry->nameidx) >=
1568                                be16_to_cpu(hdr->firstused));
1569                         ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp));
1570                         if (be16_to_cpu(entry->nameidx) < tmp)
1571                                 tmp = be16_to_cpu(entry->nameidx);
1572                 }
1573                 hdr->firstused = cpu_to_be16(tmp);
1574                 if (!hdr->firstused)
1575                         hdr->firstused = cpu_to_be16(tmp - 1);
1576         } else {
1577                 hdr->holes = 1;         /* mark as needing compaction */
1578         }
1579
1580         xfs_da_log_buf(trans, bp, XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
1581
1582         /*
1583          * Check if leaf is less than 50% full, caller may want to
1584          * "join" the leaf with a sibling if so.
1585          */
1586         tmp  = (uint)sizeof(xfs_dir_leaf_hdr_t);
1587         tmp += be16_to_cpu(leaf->hdr.count) * (uint)sizeof(xfs_dir_leaf_entry_t);
1588         tmp += be16_to_cpu(leaf->hdr.count) * ((uint)sizeof(xfs_dir_leaf_name_t) - 1);
1589         tmp += be16_to_cpu(leaf->hdr.namebytes);
1590         if (tmp < mp->m_dir_magicpct)
1591                 return 1;                       /* leaf is < 37% full */
1592         return 0;
1593 }
1594
1595 /*
1596  * Move all the directory entries from drop_leaf into save_leaf.
1597  */
1598 void
1599 xfs_dir_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
1600                                       xfs_da_state_blk_t *save_blk)
1601 {
1602         xfs_dir_leafblock_t *drop_leaf, *save_leaf, *tmp_leaf;
1603         xfs_dir_leaf_hdr_t *drop_hdr, *save_hdr, *tmp_hdr;
1604         xfs_mount_t *mp;
1605         char *tmpbuffer;
1606
1607         /*
1608          * Set up environment.
1609          */
1610         mp = state->mp;
1611         ASSERT(drop_blk->magic == XFS_DIR_LEAF_MAGIC);
1612         ASSERT(save_blk->magic == XFS_DIR_LEAF_MAGIC);
1613         drop_leaf = drop_blk->bp->data;
1614         save_leaf = save_blk->bp->data;
1615         ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1616         ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1617         drop_hdr = &drop_leaf->hdr;
1618         save_hdr = &save_leaf->hdr;
1619
1620         /*
1621          * Save last hashval from dying block for later Btree fixup.
1622          */
1623         drop_blk->hashval = be32_to_cpu(drop_leaf->entries[
1624                         be16_to_cpu(drop_leaf->hdr.count)-1].hashval);
1625
1626         /*
1627          * Check if we need a temp buffer, or can we do it in place.
1628          * Note that we don't check "leaf" for holes because we will
1629          * always be dropping it, toosmall() decided that for us already.
1630          */
1631         if (save_hdr->holes == 0) {
1632                 /*
1633                  * dest leaf has no holes, so we add there.  May need
1634                  * to make some room in the entry array.
1635                  */
1636                 if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
1637                         xfs_dir_leaf_moveents(drop_leaf, 0, save_leaf, 0,
1638                                         be16_to_cpu(drop_hdr->count), mp);
1639                 } else {
1640                         xfs_dir_leaf_moveents(drop_leaf, 0,
1641                                         save_leaf, be16_to_cpu(save_hdr->count),
1642                                         be16_to_cpu(drop_hdr->count), mp);
1643                 }
1644         } else {
1645                 /*
1646                  * Destination has holes, so we make a temporary copy
1647                  * of the leaf and add them both to that.
1648                  */
1649                 tmpbuffer = kmem_alloc(state->blocksize, KM_SLEEP);
1650                 ASSERT(tmpbuffer != NULL);
1651                 memset(tmpbuffer, 0, state->blocksize);
1652                 tmp_leaf = (xfs_dir_leafblock_t *)tmpbuffer;
1653                 tmp_hdr = &tmp_leaf->hdr;
1654                 tmp_hdr->info = save_hdr->info; /* struct copy */
1655                 tmp_hdr->count = 0;
1656                 tmp_hdr->firstused = cpu_to_be16(state->blocksize);
1657                 if (!tmp_hdr->firstused)
1658                         tmp_hdr->firstused = cpu_to_be16(state->blocksize - 1);
1659                 tmp_hdr->namebytes = 0;
1660                 if (xfs_dir_leaf_order(save_blk->bp, drop_blk->bp)) {
1661                         xfs_dir_leaf_moveents(drop_leaf, 0, tmp_leaf, 0,
1662                                         be16_to_cpu(drop_hdr->count), mp);
1663                         xfs_dir_leaf_moveents(save_leaf, 0,
1664                                         tmp_leaf, be16_to_cpu(tmp_leaf->hdr.count),
1665                                         be16_to_cpu(save_hdr->count), mp);
1666                 } else {
1667                         xfs_dir_leaf_moveents(save_leaf, 0, tmp_leaf, 0,
1668                                                  be16_to_cpu(save_hdr->count), mp);
1669                         xfs_dir_leaf_moveents(drop_leaf, 0,
1670                                               tmp_leaf, be16_to_cpu(tmp_leaf->hdr.count),
1671                                               be16_to_cpu(drop_hdr->count), mp);
1672                 }
1673                 memcpy(save_leaf, tmp_leaf, state->blocksize);
1674                 kmem_free(tmpbuffer, state->blocksize);
1675         }
1676
1677         xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
1678                                            state->blocksize - 1);
1679
1680         /*
1681          * Copy out last hashval in each block for B-tree code.
1682          */
1683         save_blk->hashval = be32_to_cpu(save_leaf->entries[
1684                         be16_to_cpu(save_leaf->hdr.count)-1].hashval);
1685 }
1686
1687 /*========================================================================
1688  * Routines used for finding things in the Btree.
1689  *========================================================================*/
1690
1691 /*
1692  * Look up a name in a leaf directory structure.
1693  * This is the internal routine, it uses the caller's buffer.
1694  *
1695  * Note that duplicate keys are allowed, but only check within the
1696  * current leaf node.  The Btree code must check in adjacent leaf nodes.
1697  *
1698  * Return in *index the index into the entry[] array of either the found
1699  * entry, or where the entry should have been (insert before that entry).
1700  *
1701  * Don't change the args->inumber unless we find the filename.
1702  */
1703 int
1704 xfs_dir_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args, int *index)
1705 {
1706         xfs_dir_leafblock_t *leaf;
1707         xfs_dir_leaf_entry_t *entry;
1708         xfs_dir_leaf_name_t *namest;
1709         int probe, span;
1710         xfs_dahash_t hashval;
1711
1712         leaf = bp->data;
1713         ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1714         ASSERT(be16_to_cpu(leaf->hdr.count) < (XFS_LBSIZE(args->dp->i_mount)/8));
1715
1716         /*
1717          * Binary search.  (note: small blocks will skip this loop)
1718          */
1719         hashval = args->hashval;
1720         probe = span = be16_to_cpu(leaf->hdr.count) / 2;
1721         for (entry = &leaf->entries[probe]; span > 4;
1722                    entry = &leaf->entries[probe]) {
1723                 span /= 2;
1724                 if (be32_to_cpu(entry->hashval) < hashval)
1725                         probe += span;
1726                 else if (be32_to_cpu(entry->hashval) > hashval)
1727                         probe -= span;
1728                 else
1729                         break;
1730         }
1731         ASSERT((probe >= 0) && \
1732                ((!leaf->hdr.count) || (probe < be16_to_cpu(leaf->hdr.count))));
1733         ASSERT((span <= 4) || (be32_to_cpu(entry->hashval) == hashval));
1734
1735         /*
1736          * Since we may have duplicate hashval's, find the first matching
1737          * hashval in the leaf.
1738          */
1739         while ((probe > 0) && (be32_to_cpu(entry->hashval) >= hashval)) {
1740                 entry--;
1741                 probe--;
1742         }
1743         while ((probe < be16_to_cpu(leaf->hdr.count)) &&
1744                (be32_to_cpu(entry->hashval) < hashval)) {
1745                 entry++;
1746                 probe++;
1747         }
1748         if ((probe == be16_to_cpu(leaf->hdr.count)) ||
1749             (be32_to_cpu(entry->hashval) != hashval)) {
1750                 *index = probe;
1751                 ASSERT(args->oknoent);
1752                 return XFS_ERROR(ENOENT);
1753         }
1754
1755         /*
1756          * Duplicate keys may be present, so search all of them for a match.
1757          */
1758         while ((probe < be16_to_cpu(leaf->hdr.count)) &&
1759                (be32_to_cpu(entry->hashval) == hashval)) {
1760                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf, be16_to_cpu(entry->nameidx));
1761                 if (entry->namelen == args->namelen &&
1762                     namest->name[0] == args->name[0] &&
1763                     memcmp(args->name, namest->name, args->namelen) == 0) {
1764                         XFS_DIR_SF_GET_DIRINO(&namest->inumber, &args->inumber);
1765                         *index = probe;
1766                         return XFS_ERROR(EEXIST);
1767                 }
1768                 entry++;
1769                 probe++;
1770         }
1771         *index = probe;
1772         ASSERT(probe == be16_to_cpu(leaf->hdr.count) || args->oknoent);
1773         return XFS_ERROR(ENOENT);
1774 }
1775
1776 /*========================================================================
1777  * Utility routines.
1778  *========================================================================*/
1779
1780 /*
1781  * Move the indicated entries from one leaf to another.
1782  * NOTE: this routine modifies both source and destination leaves.
1783  */
1784 /* ARGSUSED */
1785 STATIC void
1786 xfs_dir_leaf_moveents(xfs_dir_leafblock_t *leaf_s, int start_s,
1787                       xfs_dir_leafblock_t *leaf_d, int start_d,
1788                       int count, xfs_mount_t *mp)
1789 {
1790         xfs_dir_leaf_hdr_t *hdr_s, *hdr_d;
1791         xfs_dir_leaf_entry_t *entry_s, *entry_d;
1792         int tmp, i;
1793
1794         /*
1795          * Check for nothing to do.
1796          */
1797         if (count == 0)
1798                 return;
1799
1800         /*
1801          * Set up environment.
1802          */
1803         ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1804         ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1805         hdr_s = &leaf_s->hdr;
1806         hdr_d = &leaf_d->hdr;
1807         ASSERT(hdr_s->count && (be16_to_cpu(hdr_s->count) < (XFS_LBSIZE(mp)/8)));
1808         ASSERT(be16_to_cpu(hdr_s->firstused) >=
1809                 ((be16_to_cpu(hdr_s->count)*sizeof(*entry_s))+sizeof(*hdr_s)));
1810         ASSERT(be16_to_cpu(hdr_d->count) < (XFS_LBSIZE(mp)/8));
1811         ASSERT(be16_to_cpu(hdr_d->firstused) >=
1812                 ((be16_to_cpu(hdr_d->count)*sizeof(*entry_d))+sizeof(*hdr_d)));
1813
1814         ASSERT(start_s < be16_to_cpu(hdr_s->count));
1815         ASSERT(start_d <= be16_to_cpu(hdr_d->count));
1816         ASSERT(count <= be16_to_cpu(hdr_s->count));
1817
1818         /*
1819          * Move the entries in the destination leaf up to make a hole?
1820          */
1821         if (start_d < be16_to_cpu(hdr_d->count)) {
1822                 tmp  = be16_to_cpu(hdr_d->count) - start_d;
1823                 tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
1824                 entry_s = &leaf_d->entries[start_d];
1825                 entry_d = &leaf_d->entries[start_d + count];
1826                 memcpy(entry_d, entry_s, tmp);
1827         }
1828
1829         /*
1830          * Copy all entry's in the same (sorted) order,
1831          * but allocate filenames packed and in sequence.
1832          */
1833         entry_s = &leaf_s->entries[start_s];
1834         entry_d = &leaf_d->entries[start_d];
1835         for (i = 0; i < count; entry_s++, entry_d++, i++) {
1836                 ASSERT(be16_to_cpu(entry_s->nameidx) >=
1837                        be16_to_cpu(hdr_s->firstused));
1838                 tmp = XFS_DIR_LEAF_ENTSIZE_BYENTRY(entry_s);
1839                 be16_add(&hdr_d->firstused, -(tmp));
1840                 entry_d->hashval = entry_s->hashval;
1841                 entry_d->nameidx = hdr_d->firstused;
1842                 entry_d->namelen = entry_s->namelen;
1843                 ASSERT(be16_to_cpu(entry_d->nameidx) + tmp <= XFS_LBSIZE(mp));
1844                 memcpy(XFS_DIR_LEAF_NAMESTRUCT(leaf_d, be16_to_cpu(entry_d->nameidx)),
1845                        XFS_DIR_LEAF_NAMESTRUCT(leaf_s, be16_to_cpu(entry_s->nameidx)), tmp);
1846                 ASSERT(be16_to_cpu(entry_s->nameidx) + tmp <= XFS_LBSIZE(mp));
1847                 memset((char *)XFS_DIR_LEAF_NAMESTRUCT(leaf_s,
1848                                         be16_to_cpu(entry_s->nameidx)), 0, tmp);
1849                 be16_add(&hdr_s->namebytes, -(entry_d->namelen));
1850                 be16_add(&hdr_d->namebytes, entry_d->namelen);
1851                 be16_add(&hdr_s->count, -1);
1852                 be16_add(&hdr_d->count, +1);
1853                 tmp = be16_to_cpu(hdr_d->count) * (uint)sizeof(xfs_dir_leaf_entry_t)
1854                                 + (uint)sizeof(xfs_dir_leaf_hdr_t);
1855                 ASSERT(be16_to_cpu(hdr_d->firstused) >= tmp);
1856
1857         }
1858
1859         /*
1860          * Zero out the entries we just copied.
1861          */
1862         if (start_s == be16_to_cpu(hdr_s->count)) {
1863                 tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
1864                 entry_s = &leaf_s->entries[start_s];
1865                 ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
1866                 memset((char *)entry_s, 0, tmp);
1867         } else {
1868                 /*
1869                  * Move the remaining entries down to fill the hole,
1870                  * then zero the entries at the top.
1871                  */
1872                 tmp  = be16_to_cpu(hdr_s->count) - count;
1873                 tmp *= (uint)sizeof(xfs_dir_leaf_entry_t);
1874                 entry_s = &leaf_s->entries[start_s + count];
1875                 entry_d = &leaf_s->entries[start_s];
1876                 memcpy(entry_d, entry_s, tmp);
1877
1878                 tmp = count * (uint)sizeof(xfs_dir_leaf_entry_t);
1879                 entry_s = &leaf_s->entries[be16_to_cpu(hdr_s->count)];
1880                 ASSERT((char *)entry_s + tmp <= (char *)leaf_s + XFS_LBSIZE(mp));
1881                 memset((char *)entry_s, 0, tmp);
1882         }
1883
1884         /*
1885          * Fill in the freemap information
1886          */
1887         hdr_d->freemap[0].base = cpu_to_be16(sizeof(xfs_dir_leaf_hdr_t) +
1888                         be16_to_cpu(hdr_d->count) * sizeof(xfs_dir_leaf_entry_t));
1889         hdr_d->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr_d->firstused) -
1890                         be16_to_cpu(hdr_d->freemap[0].base));
1891         hdr_d->freemap[1].base = 0;
1892         hdr_d->freemap[1].size = 0;
1893         hdr_d->freemap[2].base = 0;
1894         hdr_d->freemap[2].size = 0;
1895         hdr_s->holes = 1;       /* leaf may not be compact */
1896 }
1897
1898 /*
1899  * Compare two leaf blocks "order".
1900  */
1901 int
1902 xfs_dir_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
1903 {
1904         xfs_dir_leafblock_t *leaf1, *leaf2;
1905
1906         leaf1 = leaf1_bp->data;
1907         leaf2 = leaf2_bp->data;
1908         ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR_LEAF_MAGIC) &&
1909                (be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR_LEAF_MAGIC));
1910         if (leaf1->hdr.count && leaf2->hdr.count &&
1911             ((be32_to_cpu(leaf2->entries[0].hashval) <
1912               be32_to_cpu(leaf1->entries[0 ].hashval)) ||
1913              (be32_to_cpu(leaf2->entries[
1914                           be16_to_cpu(leaf2->hdr.count)-1].hashval) <
1915               be32_to_cpu(leaf1->entries[
1916                           be16_to_cpu(leaf1->hdr.count)-1].hashval)))) {
1917                 return 1;
1918         }
1919         return 0;
1920 }
1921
1922 /*
1923  * Pick up the last hashvalue from a leaf block.
1924  */
1925 xfs_dahash_t
1926 xfs_dir_leaf_lasthash(xfs_dabuf_t *bp, int *count)
1927 {
1928         xfs_dir_leafblock_t *leaf;
1929
1930         leaf = bp->data;
1931         ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR_LEAF_MAGIC);
1932         if (count)
1933                 *count = be16_to_cpu(leaf->hdr.count);
1934         if (!leaf->hdr.count)
1935                 return(0);
1936         return be32_to_cpu(leaf->entries[be16_to_cpu(leaf->hdr.count)-1].hashval);
1937 }
1938
1939 /*
1940  * Copy out directory entries for getdents(), for leaf directories.
1941  */
1942 int
1943 xfs_dir_leaf_getdents_int(
1944         xfs_dabuf_t     *bp,
1945         xfs_inode_t     *dp,
1946         xfs_dablk_t     bno,
1947         uio_t           *uio,
1948         int             *eobp,
1949         xfs_dirent_t    *dbp,
1950         xfs_dir_put_t   put,
1951         xfs_daddr_t             nextda)
1952 {
1953         xfs_dir_leafblock_t     *leaf;
1954         xfs_dir_leaf_entry_t    *entry;
1955         xfs_dir_leaf_name_t     *namest;
1956         int                     entno, want_entno, i, nextentno;
1957         xfs_mount_t             *mp;
1958         xfs_dahash_t            cookhash;
1959         xfs_dahash_t            nexthash = 0;
1960 #if (BITS_PER_LONG == 32)
1961         xfs_dahash_t            lasthash = XFS_DA_MAXHASH;
1962 #endif
1963         xfs_dir_put_args_t      p;
1964
1965         mp = dp->i_mount;
1966         leaf = bp->data;
1967         if (be16_to_cpu(leaf->hdr.info.magic) != XFS_DIR_LEAF_MAGIC) {
1968                 *eobp = 1;
1969                 return XFS_ERROR(ENOENT);       /* XXX wrong code */
1970         }
1971
1972         want_entno = XFS_DA_COOKIE_ENTRY(mp, uio->uio_offset);
1973
1974         cookhash = XFS_DA_COOKIE_HASH(mp, uio->uio_offset);
1975
1976         xfs_dir_trace_g_dul("leaf: start", dp, uio, leaf);
1977
1978         /*
1979          * Re-find our place.
1980          */
1981         for (i = entno = 0, entry = &leaf->entries[0];
1982                      i < be16_to_cpu(leaf->hdr.count); entry++, i++) {
1983
1984                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
1985                                     be16_to_cpu(entry->nameidx));
1986
1987                 if (unlikely(
1988                     ((char *)namest < (char *)leaf) ||
1989                     ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) {
1990                         XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(1)",
1991                                              XFS_ERRLEVEL_LOW, mp, leaf);
1992                         xfs_dir_trace_g_du("leaf: corrupted", dp, uio);
1993                         return XFS_ERROR(EFSCORRUPTED);
1994                 }
1995                 if (be32_to_cpu(entry->hashval) >= cookhash) {
1996                         if (entno < want_entno &&
1997                             be32_to_cpu(entry->hashval) == cookhash) {
1998                                 /*
1999                                  * Trying to get to a particular offset in a
2000                                  * run of equal-hashval entries.
2001                                  */
2002                                 entno++;
2003                         } else if (want_entno > 0 && entno == want_entno &&
2004                                    be32_to_cpu(entry->hashval) == cookhash) {
2005                                 break;
2006                         } else {
2007                                 entno = 0;
2008                                 break;
2009                         }
2010                 }
2011         }
2012
2013         if (i == be16_to_cpu(leaf->hdr.count)) {
2014                 xfs_dir_trace_g_du("leaf: hash not found", dp, uio);
2015                 if (!leaf->hdr.info.forw)
2016                         uio->uio_offset =
2017                                 XFS_DA_MAKE_COOKIE(mp, 0, 0, XFS_DA_MAXHASH);
2018                 /*
2019                  * Don't set uio_offset if there's another block:
2020                  * the node code will be setting uio_offset anyway.
2021                  */
2022                 *eobp = 0;
2023                 return 0;
2024         }
2025         xfs_dir_trace_g_due("leaf: hash found", dp, uio, entry);
2026
2027         p.dbp = dbp;
2028         p.put = put;
2029         p.uio = uio;
2030
2031         /*
2032          * We're synchronized, start copying entries out to the user.
2033          */
2034         for (; entno >= 0 && i < be16_to_cpu(leaf->hdr.count);
2035                              entry++, i++, (entno = nextentno)) {
2036                 int lastresid=0, retval;
2037                 xfs_dircook_t lastoffset;
2038                 xfs_dahash_t thishash;
2039
2040                 /*
2041                  * Check for a damaged directory leaf block and pick up
2042                  * the inode number from this entry.
2043                  */
2044                 namest = XFS_DIR_LEAF_NAMESTRUCT(leaf,
2045                                     be16_to_cpu(entry->nameidx));
2046
2047                 if (unlikely(
2048                     ((char *)namest < (char *)leaf) ||
2049                     ((char *)namest >= (char *)leaf + XFS_LBSIZE(mp)))) {
2050                         XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(2)",
2051                                              XFS_ERRLEVEL_LOW, mp, leaf);
2052                         xfs_dir_trace_g_du("leaf: corrupted", dp, uio);
2053                         return XFS_ERROR(EFSCORRUPTED);
2054                 }
2055
2056                 xfs_dir_trace_g_duc("leaf: middle cookie  ",
2057                                                    dp, uio, p.cook.o);
2058
2059                 if (i < (be16_to_cpu(leaf->hdr.count) - 1)) {
2060                         nexthash = be32_to_cpu(entry[1].hashval);
2061
2062                         if (nexthash == be32_to_cpu(entry->hashval))
2063                                 nextentno = entno + 1;
2064                         else
2065                                 nextentno = 0;
2066                         XFS_PUT_COOKIE(p.cook, mp, bno, nextentno, nexthash);
2067                         xfs_dir_trace_g_duc("leaf: middle cookie  ",
2068                                                    dp, uio, p.cook.o);
2069
2070                 } else if ((thishash = be32_to_cpu(leaf->hdr.info.forw))) {
2071                         xfs_dabuf_t *bp2;
2072                         xfs_dir_leafblock_t *leaf2;
2073
2074                         ASSERT(nextda != -1);
2075
2076                         retval = xfs_da_read_buf(dp->i_transp, dp, thishash,
2077                                                  nextda, &bp2, XFS_DATA_FORK);
2078                         if (retval)
2079                                 return retval;
2080
2081                         ASSERT(bp2 != NULL);
2082
2083                         leaf2 = bp2->data;
2084
2085                         if (unlikely(
2086                                (be16_to_cpu(leaf2->hdr.info.magic)
2087                                                 != XFS_DIR_LEAF_MAGIC)
2088                             || (be32_to_cpu(leaf2->hdr.info.back)
2089                                                 != bno))) {     /* GROT */
2090                                 XFS_CORRUPTION_ERROR("xfs_dir_leaf_getdents_int(3)",
2091                                                      XFS_ERRLEVEL_LOW, mp,
2092                                                      leaf2);
2093                                 xfs_da_brelse(dp->i_transp, bp2);
2094
2095                                 return XFS_ERROR(EFSCORRUPTED);
2096                         }
2097
2098                         nexthash = be32_to_cpu(leaf2->entries[0].hashval);
2099                         nextentno = -1;
2100                         XFS_PUT_COOKIE(p.cook, mp, thishash, 0, nexthash);
2101                         xfs_da_brelse(dp->i_transp, bp2);
2102                         xfs_dir_trace_g_duc("leaf: next blk cookie",
2103                                                    dp, uio, p.cook.o);
2104                 } else {
2105                         nextentno = -1;
2106                         XFS_PUT_COOKIE(p.cook, mp, 0, 0, XFS_DA_MAXHASH);
2107                 }
2108
2109                 /*
2110                  * Save off the cookie so we can fall back should the
2111                  * 'put' into the outgoing buffer fails.  To handle a run
2112                  * of equal-hashvals, the off_t structure on 64bit
2113                  * builds has entno built into the cookie to ID the
2114                  * entry.  On 32bit builds, we only have space for the
2115                  * hashval so we can't ID specific entries within a group
2116                  * of same hashval entries.   For this, lastoffset is set
2117                  * to the first in the run of equal hashvals so we don't
2118                  * include any entries unless we can include all entries
2119                  * that share the same hashval.  Hopefully the buffer
2120                  * provided is big enough to handle it (see pv763517).
2121                  */
2122                 thishash = be32_to_cpu(entry->hashval);
2123 #if (BITS_PER_LONG == 32)
2124                 if (thishash != lasthash) {
2125                         XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash);
2126                         lastresid = uio->uio_resid;
2127                         lasthash = thishash;
2128                 } else {
2129                         xfs_dir_trace_g_duc("leaf: DUP COOKIES, skipped",
2130                                                    dp, uio, p.cook.o);
2131                 }
2132 #else
2133                 XFS_PUT_COOKIE(lastoffset, mp, bno, entno, thishash);
2134                 lastresid = uio->uio_resid;
2135 #endif /* BITS_PER_LONG == 32 */
2136
2137                 /*
2138                  * Put the current entry into the outgoing buffer.  If we fail
2139                  * then restore the UIO to the first entry in the current
2140                  * run of equal-hashval entries (probably one 1 entry long).
2141                  */
2142                 p.ino = XFS_GET_DIR_INO8(namest->inumber);
2143 #if XFS_BIG_INUMS
2144                 p.ino += mp->m_inoadd;
2145 #endif
2146                 p.name = (char *)namest->name;
2147                 p.namelen = entry->namelen;
2148
2149                 retval = p.put(&p);
2150
2151                 if (!p.done) {
2152                         uio->uio_offset = lastoffset.o;
2153                         uio->uio_resid = lastresid;
2154
2155                         *eobp = 1;
2156
2157                         xfs_dir_trace_g_du("leaf: E-O-B", dp, uio);
2158
2159                         return retval;
2160                 }
2161         }
2162
2163         uio->uio_offset = p.cook.o;
2164
2165         *eobp = 0;
2166
2167         xfs_dir_trace_g_du("leaf: E-O-F", dp, uio);
2168
2169         return 0;
2170 }
2171
2172 /*
2173  * Format a dirent64 structure and copy it out the the user's buffer.
2174  */
2175 int
2176 xfs_dir_put_dirent64_direct(xfs_dir_put_args_t *pa)
2177 {
2178         iovec_t *iovp;
2179         int reclen, namelen;
2180         xfs_dirent_t *idbp;
2181         uio_t *uio;
2182
2183         namelen = pa->namelen;
2184         reclen = DIRENTSIZE(namelen);
2185         uio = pa->uio;
2186         if (reclen > uio->uio_resid) {
2187                 pa->done = 0;
2188                 return 0;
2189         }
2190         iovp = uio->uio_iov;
2191         idbp = (xfs_dirent_t *)iovp->iov_base;
2192         iovp->iov_base = (char *)idbp + reclen;
2193         iovp->iov_len -= reclen;
2194         uio->uio_resid -= reclen;
2195         idbp->d_reclen = reclen;
2196         idbp->d_ino = pa->ino;
2197         idbp->d_off = pa->cook.o;
2198         idbp->d_name[namelen] = '\0';
2199         pa->done = 1;
2200         memcpy(idbp->d_name, pa->name, namelen);
2201         return 0;
2202 }
2203
2204 /*
2205  * Format a dirent64 structure and copy it out the the user's buffer.
2206  */
2207 int
2208 xfs_dir_put_dirent64_uio(xfs_dir_put_args_t *pa)
2209 {
2210         int             retval, reclen, namelen;
2211         xfs_dirent_t    *idbp;
2212         uio_t           *uio;
2213
2214         namelen = pa->namelen;
2215         reclen = DIRENTSIZE(namelen);
2216         uio = pa->uio;
2217         if (reclen > uio->uio_resid) {
2218                 pa->done = 0;
2219                 return 0;
2220         }
2221         idbp = pa->dbp;
2222         idbp->d_reclen = reclen;
2223         idbp->d_ino = pa->ino;
2224         idbp->d_off = pa->cook.o;
2225         idbp->d_name[namelen] = '\0';
2226         memcpy(idbp->d_name, pa->name, namelen);
2227         retval = uio_read((caddr_t)idbp, reclen, uio);
2228         pa->done = (retval == 0);
2229         return retval;
2230 }