Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6] / fs / xfs / xfs_itable.c
1 /*
2  * Copyright (c) 2000-2002 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32
33 #include "xfs.h"
34 #include "xfs_macros.h"
35 #include "xfs_types.h"
36 #include "xfs_inum.h"
37 #include "xfs_log.h"
38 #include "xfs_trans.h"
39 #include "xfs_sb.h"
40 #include "xfs_dir.h"
41 #include "xfs_dir2.h"
42 #include "xfs_dmapi.h"
43 #include "xfs_mount.h"
44 #include "xfs_ag.h"
45 #include "xfs_alloc_btree.h"
46 #include "xfs_bmap_btree.h"
47 #include "xfs_ialloc_btree.h"
48 #include "xfs_btree.h"
49 #include "xfs_attr_sf.h"
50 #include "xfs_dir_sf.h"
51 #include "xfs_dir2_sf.h"
52 #include "xfs_dinode.h"
53 #include "xfs_inode.h"
54 #include "xfs_ialloc.h"
55 #include "xfs_itable.h"
56 #include "xfs_error.h"
57
58 #ifndef HAVE_USERACC
59 #define useracc(ubuffer, size, flags, foo) (0)
60 #define unuseracc(ubuffer, size, flags)
61 #endif
62
63 STATIC int
64 xfs_bulkstat_one_iget(
65         xfs_mount_t     *mp,            /* mount point for filesystem */
66         xfs_ino_t       ino,            /* inode number to get data for */
67         xfs_daddr_t     bno,            /* starting bno of inode cluster */
68         xfs_bstat_t     *buf,           /* return buffer */
69         int             *stat)          /* BULKSTAT_RV_... */
70 {
71         xfs_dinode_core_t *dic;         /* dinode core info pointer */
72         xfs_inode_t     *ip;            /* incore inode pointer */
73         int             error;
74
75         error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_SHARED, &ip, bno);
76         if (error) {
77                 *stat = BULKSTAT_RV_NOTHING;
78                 return error;
79         }
80
81         ASSERT(ip != NULL);
82         ASSERT(ip->i_blkno != (xfs_daddr_t)0);
83         if (ip->i_d.di_mode == 0) {
84                 *stat = BULKSTAT_RV_NOTHING;
85                 error = XFS_ERROR(ENOENT);
86                 goto out_iput;
87         }
88
89         dic = &ip->i_d;
90
91         /* xfs_iget returns the following without needing
92          * further change.
93          */
94         buf->bs_nlink = dic->di_nlink;
95         buf->bs_projid = dic->di_projid;
96         buf->bs_ino = ino;
97         buf->bs_mode = dic->di_mode;
98         buf->bs_uid = dic->di_uid;
99         buf->bs_gid = dic->di_gid;
100         buf->bs_size = dic->di_size;
101         buf->bs_atime.tv_sec = dic->di_atime.t_sec;
102         buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
103         buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
104         buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
105         buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
106         buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
107         buf->bs_xflags = xfs_ip2xflags(ip);
108         buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
109         buf->bs_extents = dic->di_nextents;
110         buf->bs_gen = dic->di_gen;
111         memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
112         buf->bs_dmevmask = dic->di_dmevmask;
113         buf->bs_dmstate = dic->di_dmstate;
114         buf->bs_aextents = dic->di_anextents;
115
116         switch (dic->di_format) {
117         case XFS_DINODE_FMT_DEV:
118                 buf->bs_rdev = ip->i_df.if_u2.if_rdev;
119                 buf->bs_blksize = BLKDEV_IOSIZE;
120                 buf->bs_blocks = 0;
121                 break;
122         case XFS_DINODE_FMT_LOCAL:
123         case XFS_DINODE_FMT_UUID:
124                 buf->bs_rdev = 0;
125                 buf->bs_blksize = mp->m_sb.sb_blocksize;
126                 buf->bs_blocks = 0;
127                 break;
128         case XFS_DINODE_FMT_EXTENTS:
129         case XFS_DINODE_FMT_BTREE:
130                 buf->bs_rdev = 0;
131                 buf->bs_blksize = mp->m_sb.sb_blocksize;
132                 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
133                 break;
134         }
135
136  out_iput:
137         xfs_iput(ip, XFS_ILOCK_SHARED);
138         return error;
139 }
140
141 STATIC int
142 xfs_bulkstat_one_dinode(
143         xfs_mount_t     *mp,            /* mount point for filesystem */
144         xfs_ino_t       ino,            /* inode number to get data for */
145         xfs_dinode_t    *dip,           /* dinode inode pointer */
146         xfs_bstat_t     *buf)           /* return buffer */
147 {
148         xfs_dinode_core_t *dic;         /* dinode core info pointer */
149
150         dic = &dip->di_core;
151
152         /*
153          * The inode format changed when we moved the link count and
154          * made it 32 bits long.  If this is an old format inode,
155          * convert it in memory to look like a new one.  If it gets
156          * flushed to disk we will convert back before flushing or
157          * logging it.  We zero out the new projid field and the old link
158          * count field.  We'll handle clearing the pad field (the remains
159          * of the old uuid field) when we actually convert the inode to
160          * the new format. We don't change the version number so that we
161          * can distinguish this from a real new format inode.
162          */
163         if (INT_GET(dic->di_version, ARCH_CONVERT) == XFS_DINODE_VERSION_1) {
164                 buf->bs_nlink = INT_GET(dic->di_onlink, ARCH_CONVERT);
165                 buf->bs_projid = 0;
166         } else {
167                 buf->bs_nlink = INT_GET(dic->di_nlink, ARCH_CONVERT);
168                 buf->bs_projid = INT_GET(dic->di_projid, ARCH_CONVERT);
169         }
170
171         buf->bs_ino = ino;
172         buf->bs_mode = INT_GET(dic->di_mode, ARCH_CONVERT);
173         buf->bs_uid = INT_GET(dic->di_uid, ARCH_CONVERT);
174         buf->bs_gid = INT_GET(dic->di_gid, ARCH_CONVERT);
175         buf->bs_size = INT_GET(dic->di_size, ARCH_CONVERT);
176         buf->bs_atime.tv_sec = INT_GET(dic->di_atime.t_sec, ARCH_CONVERT);
177         buf->bs_atime.tv_nsec = INT_GET(dic->di_atime.t_nsec, ARCH_CONVERT);
178         buf->bs_mtime.tv_sec = INT_GET(dic->di_mtime.t_sec, ARCH_CONVERT);
179         buf->bs_mtime.tv_nsec = INT_GET(dic->di_mtime.t_nsec, ARCH_CONVERT);
180         buf->bs_ctime.tv_sec = INT_GET(dic->di_ctime.t_sec, ARCH_CONVERT);
181         buf->bs_ctime.tv_nsec = INT_GET(dic->di_ctime.t_nsec, ARCH_CONVERT);
182         buf->bs_xflags = xfs_dic2xflags(dic);
183         buf->bs_extsize = INT_GET(dic->di_extsize, ARCH_CONVERT) << mp->m_sb.sb_blocklog;
184         buf->bs_extents = INT_GET(dic->di_nextents, ARCH_CONVERT);
185         buf->bs_gen = INT_GET(dic->di_gen, ARCH_CONVERT);
186         memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
187         buf->bs_dmevmask = INT_GET(dic->di_dmevmask, ARCH_CONVERT);
188         buf->bs_dmstate = INT_GET(dic->di_dmstate, ARCH_CONVERT);
189         buf->bs_aextents = INT_GET(dic->di_anextents, ARCH_CONVERT);
190
191         switch (INT_GET(dic->di_format, ARCH_CONVERT)) {
192         case XFS_DINODE_FMT_DEV:
193                 buf->bs_rdev = INT_GET(dip->di_u.di_dev, ARCH_CONVERT);
194                 buf->bs_blksize = BLKDEV_IOSIZE;
195                 buf->bs_blocks = 0;
196                 break;
197         case XFS_DINODE_FMT_LOCAL:
198         case XFS_DINODE_FMT_UUID:
199                 buf->bs_rdev = 0;
200                 buf->bs_blksize = mp->m_sb.sb_blocksize;
201                 buf->bs_blocks = 0;
202                 break;
203         case XFS_DINODE_FMT_EXTENTS:
204         case XFS_DINODE_FMT_BTREE:
205                 buf->bs_rdev = 0;
206                 buf->bs_blksize = mp->m_sb.sb_blocksize;
207                 buf->bs_blocks = INT_GET(dic->di_nblocks, ARCH_CONVERT);
208                 break;
209         }
210
211         return 0;
212 }
213
214 /*
215  * Return stat information for one inode.
216  * Return 0 if ok, else errno.
217  */
218 int                             /* error status */
219 xfs_bulkstat_one(
220         xfs_mount_t     *mp,            /* mount point for filesystem */
221         xfs_ino_t       ino,            /* inode number to get data for */
222         void            __user *buffer, /* buffer to place output in */
223         int             ubsize,         /* size of buffer */
224         void            *private_data,  /* my private data */
225         xfs_daddr_t     bno,            /* starting bno of inode cluster */
226         int             *ubused,        /* bytes used by me */
227         void            *dibuff,        /* on-disk inode buffer */
228         int             *stat)          /* BULKSTAT_RV_... */
229 {
230         xfs_bstat_t     *buf;           /* return buffer */
231         int             error = 0;      /* error value */
232         xfs_dinode_t    *dip;           /* dinode inode pointer */
233
234         dip = (xfs_dinode_t *)dibuff;
235
236         if (!buffer || ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
237             (XFS_SB_VERSION_HASQUOTA(&mp->m_sb) &&
238              (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino))) {
239                 *stat = BULKSTAT_RV_NOTHING;
240                 return XFS_ERROR(EINVAL);
241         }
242         if (ubsize < sizeof(*buf)) {
243                 *stat = BULKSTAT_RV_NOTHING;
244                 return XFS_ERROR(ENOMEM);
245         }
246
247         buf = kmem_alloc(sizeof(*buf), KM_SLEEP);
248
249         if (dip == NULL) {
250                 /* We're not being passed a pointer to a dinode.  This happens
251                  * if BULKSTAT_FG_IGET is selected.  Do the iget.
252                  */
253                 error = xfs_bulkstat_one_iget(mp, ino, bno, buf, stat);
254                 if (error)
255                         goto out_free;
256         } else {
257                 xfs_bulkstat_one_dinode(mp, ino, dip, buf);
258         }
259
260         if (copy_to_user(buffer, buf, sizeof(*buf)))  {
261                 *stat = BULKSTAT_RV_NOTHING;
262                 error =  EFAULT;
263                 goto out_free;
264         }
265
266         *stat = BULKSTAT_RV_DIDONE;
267         if (ubused)
268                 *ubused = sizeof(*buf);
269
270  out_free:
271         kmem_free(buf, sizeof(*buf));
272         return error;
273 }
274
275 /*
276  * Return stat information in bulk (by-inode) for the filesystem.
277  */
278 int                                     /* error status */
279 xfs_bulkstat(
280         xfs_mount_t             *mp,    /* mount point for filesystem */
281         xfs_ino_t               *lastinop, /* last inode returned */
282         int                     *ubcountp, /* size of buffer/count returned */
283         bulkstat_one_pf         formatter, /* func that'd fill a single buf */
284         void                    *private_data,/* private data for formatter */
285         size_t                  statstruct_size, /* sizeof struct filling */
286         char                    __user *ubuffer, /* buffer with inode stats */
287         int                     flags,  /* defined in xfs_itable.h */
288         int                     *done)  /* 1 if there're more stats to get */
289 {
290         xfs_agblock_t           agbno=0;/* allocation group block number */
291         xfs_buf_t               *agbp;  /* agi header buffer */
292         xfs_agi_t               *agi;   /* agi header data */
293         xfs_agino_t             agino;  /* inode # in allocation group */
294         xfs_agnumber_t          agno;   /* allocation group number */
295         xfs_daddr_t             bno;    /* inode cluster start daddr */
296         int                     chunkidx; /* current index into inode chunk */
297         int                     clustidx; /* current index into inode cluster */
298         xfs_btree_cur_t         *cur;   /* btree cursor for ialloc btree */
299         int                     end_of_ag; /* set if we've seen the ag end */
300         int                     error;  /* error code */
301         int                     fmterror;/* bulkstat formatter result */
302         __int32_t               gcnt;   /* current btree rec's count */
303         xfs_inofree_t           gfree;  /* current btree rec's free mask */
304         xfs_agino_t             gino;   /* current btree rec's start inode */
305         int                     i;      /* loop index */
306         int                     icount; /* count of inodes good in irbuf */
307         xfs_ino_t               ino;    /* inode number (filesystem) */
308         xfs_inobt_rec_t         *irbp;  /* current irec buffer pointer */
309         xfs_inobt_rec_t         *irbuf; /* start of irec buffer */
310         xfs_inobt_rec_t         *irbufend; /* end of good irec buffer entries */
311         xfs_ino_t               lastino=0; /* last inode number returned */
312         int                     nbcluster; /* # of blocks in a cluster */
313         int                     nicluster; /* # of inodes in a cluster */
314         int                     nimask; /* mask for inode clusters */
315         int                     nirbuf; /* size of irbuf */
316         int                     rval;   /* return value error code */
317         int                     tmp;    /* result value from btree calls */
318         int                     ubcount; /* size of user's buffer */
319         int                     ubleft; /* bytes left in user's buffer */
320         char                    __user *ubufp;  /* pointer into user's buffer */
321         int                     ubelem; /* spaces used in user's buffer */
322         int                     ubused; /* bytes used by formatter */
323         xfs_buf_t               *bp;    /* ptr to on-disk inode cluster buf */
324         xfs_dinode_t            *dip;   /* ptr into bp for specific inode */
325         xfs_inode_t             *ip;    /* ptr to in-core inode struct */
326
327         /*
328          * Get the last inode value, see if there's nothing to do.
329          */
330         ino = (xfs_ino_t)*lastinop;
331         dip = NULL;
332         agno = XFS_INO_TO_AGNO(mp, ino);
333         agino = XFS_INO_TO_AGINO(mp, ino);
334         if (agno >= mp->m_sb.sb_agcount ||
335             ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
336                 *done = 1;
337                 *ubcountp = 0;
338                 return 0;
339         }
340         ubcount = *ubcountp; /* statstruct's */
341         ubleft = ubcount * statstruct_size; /* bytes */
342         *ubcountp = ubelem = 0;
343         *done = 0;
344         fmterror = 0;
345         ubufp = ubuffer;
346         nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ?
347                 mp->m_sb.sb_inopblock :
348                 (XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog);
349         nimask = ~(nicluster - 1);
350         nbcluster = nicluster >> mp->m_sb.sb_inopblog;
351         /*
352          * Lock down the user's buffer. If a buffer was not sent, as in the case
353          * disk quota code calls here, we skip this.
354          */
355         if (ubuffer &&
356             (error = useracc(ubuffer, ubcount * statstruct_size,
357                         (B_READ|B_PHYS), NULL))) {
358                 return error;
359         }
360         /*
361          * Allocate a page-sized buffer for inode btree records.
362          * We could try allocating something smaller, but for normal
363          * calls we'll always (potentially) need the whole page.
364          */
365         irbuf = kmem_alloc(NBPC, KM_SLEEP);
366         nirbuf = NBPC / sizeof(*irbuf);
367         /*
368          * Loop over the allocation groups, starting from the last
369          * inode returned; 0 means start of the allocation group.
370          */
371         rval = 0;
372         while (ubleft >= statstruct_size && agno < mp->m_sb.sb_agcount) {
373                 bp = NULL;
374                 down_read(&mp->m_peraglock);
375                 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
376                 up_read(&mp->m_peraglock);
377                 if (error) {
378                         /*
379                          * Skip this allocation group and go to the next one.
380                          */
381                         agno++;
382                         agino = 0;
383                         continue;
384                 }
385                 agi = XFS_BUF_TO_AGI(agbp);
386                 /*
387                  * Allocate and initialize a btree cursor for ialloc btree.
388                  */
389                 cur = xfs_btree_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_INO,
390                         (xfs_inode_t *)0, 0);
391                 irbp = irbuf;
392                 irbufend = irbuf + nirbuf;
393                 end_of_ag = 0;
394                 /*
395                  * If we're returning in the middle of an allocation group,
396                  * we need to get the remainder of the chunk we're in.
397                  */
398                 if (agino > 0) {
399                         /*
400                          * Lookup the inode chunk that this inode lives in.
401                          */
402                         error = xfs_inobt_lookup_le(cur, agino, 0, 0, &tmp);
403                         if (!error &&   /* no I/O error */
404                             tmp &&      /* lookup succeeded */
405                                         /* got the record, should always work */
406                             !(error = xfs_inobt_get_rec(cur, &gino, &gcnt,
407                                     &gfree, &i)) &&
408                             i == 1 &&
409                                         /* this is the right chunk */
410                             agino < gino + XFS_INODES_PER_CHUNK &&
411                                         /* lastino was not last in chunk */
412                             (chunkidx = agino - gino + 1) <
413                                     XFS_INODES_PER_CHUNK &&
414                                         /* there are some left allocated */
415                             XFS_INOBT_MASKN(chunkidx,
416                                     XFS_INODES_PER_CHUNK - chunkidx) & ~gfree) {
417                                 /*
418                                  * Grab the chunk record.  Mark all the
419                                  * uninteresting inodes (because they're
420                                  * before our start point) free.
421                                  */
422                                 for (i = 0; i < chunkidx; i++) {
423                                         if (XFS_INOBT_MASK(i) & ~gfree)
424                                                 gcnt++;
425                                 }
426                                 gfree |= XFS_INOBT_MASKN(0, chunkidx);
427                                 INT_SET(irbp->ir_startino, ARCH_CONVERT, gino);
428                                 INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt);
429                                 INT_SET(irbp->ir_free, ARCH_CONVERT, gfree);
430                                 irbp++;
431                                 agino = gino + XFS_INODES_PER_CHUNK;
432                                 icount = XFS_INODES_PER_CHUNK - gcnt;
433                         } else {
434                                 /*
435                                  * If any of those tests failed, bump the
436                                  * inode number (just in case).
437                                  */
438                                 agino++;
439                                 icount = 0;
440                         }
441                         /*
442                          * In any case, increment to the next record.
443                          */
444                         if (!error)
445                                 error = xfs_inobt_increment(cur, 0, &tmp);
446                 } else {
447                         /*
448                          * Start of ag.  Lookup the first inode chunk.
449                          */
450                         error = xfs_inobt_lookup_ge(cur, 0, 0, 0, &tmp);
451                         icount = 0;
452                 }
453                 /*
454                  * Loop through inode btree records in this ag,
455                  * until we run out of inodes or space in the buffer.
456                  */
457                 while (irbp < irbufend && icount < ubcount) {
458                         /*
459                          * Loop as long as we're unable to read the
460                          * inode btree.
461                          */
462                         while (error) {
463                                 agino += XFS_INODES_PER_CHUNK;
464                                 if (XFS_AGINO_TO_AGBNO(mp, agino) >=
465                                                 INT_GET(agi->agi_length, ARCH_CONVERT))
466                                         break;
467                                 error = xfs_inobt_lookup_ge(cur, agino, 0, 0,
468                                                             &tmp);
469                         }
470                         /*
471                          * If ran off the end of the ag either with an error,
472                          * or the normal way, set end and stop collecting.
473                          */
474                         if (error ||
475                             (error = xfs_inobt_get_rec(cur, &gino, &gcnt,
476                                     &gfree, &i)) ||
477                             i == 0) {
478                                 end_of_ag = 1;
479                                 break;
480                         }
481                         /*
482                          * If this chunk has any allocated inodes, save it.
483                          */
484                         if (gcnt < XFS_INODES_PER_CHUNK) {
485                                 INT_SET(irbp->ir_startino, ARCH_CONVERT, gino);
486                                 INT_SET(irbp->ir_freecount, ARCH_CONVERT, gcnt);
487                                 INT_SET(irbp->ir_free, ARCH_CONVERT, gfree);
488                                 irbp++;
489                                 icount += XFS_INODES_PER_CHUNK - gcnt;
490                         }
491                         /*
492                          * Set agino to after this chunk and bump the cursor.
493                          */
494                         agino = gino + XFS_INODES_PER_CHUNK;
495                         error = xfs_inobt_increment(cur, 0, &tmp);
496                 }
497                 /*
498                  * Drop the btree buffers and the agi buffer.
499                  * We can't hold any of the locks these represent
500                  * when calling iget.
501                  */
502                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
503                 xfs_buf_relse(agbp);
504                 /*
505                  * Now format all the good inodes into the user's buffer.
506                  */
507                 irbufend = irbp;
508                 for (irbp = irbuf;
509                      irbp < irbufend && ubleft >= statstruct_size; irbp++) {
510                         /*
511                          * Read-ahead the next chunk's worth of inodes.
512                          */
513                         if (&irbp[1] < irbufend) {
514                                 /*
515                                  * Loop over all clusters in the next chunk.
516                                  * Do a readahead if there are any allocated
517                                  * inodes in that cluster.
518                                  */
519                                 for (agbno = XFS_AGINO_TO_AGBNO(mp,
520                                                         INT_GET(irbp[1].ir_startino, ARCH_CONVERT)),
521                                      chunkidx = 0;
522                                      chunkidx < XFS_INODES_PER_CHUNK;
523                                      chunkidx += nicluster,
524                                      agbno += nbcluster) {
525                                         if (XFS_INOBT_MASKN(chunkidx,
526                                                             nicluster) &
527                                             ~(INT_GET(irbp[1].ir_free, ARCH_CONVERT)))
528                                                 xfs_btree_reada_bufs(mp, agno,
529                                                         agbno, nbcluster);
530                                 }
531                         }
532                         /*
533                          * Now process this chunk of inodes.
534                          */
535                         for (agino = INT_GET(irbp->ir_startino, ARCH_CONVERT), chunkidx = 0, clustidx = 0;
536                              ubleft > 0 &&
537                                 INT_GET(irbp->ir_freecount, ARCH_CONVERT) < XFS_INODES_PER_CHUNK;
538                              chunkidx++, clustidx++, agino++) {
539                                 ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
540                                 /*
541                                  * Recompute agbno if this is the
542                                  * first inode of the cluster.
543                                  *
544                                  * Careful with clustidx.   There can be
545                                  * multple clusters per chunk, a single
546                                  * cluster per chunk or a cluster that has
547                                  * inodes represented from several different
548                                  * chunks (if blocksize is large).
549                                  *
550                                  * Because of this, the starting clustidx is
551                                  * initialized to zero in this loop but must
552                                  * later be reset after reading in the cluster
553                                  * buffer.
554                                  */
555                                 if ((chunkidx & (nicluster - 1)) == 0) {
556                                         agbno = XFS_AGINO_TO_AGBNO(mp,
557                                                         INT_GET(irbp->ir_startino, ARCH_CONVERT)) +
558                                                 ((chunkidx & nimask) >>
559                                                  mp->m_sb.sb_inopblog);
560
561                                         if (flags & BULKSTAT_FG_QUICK) {
562                                                 ino = XFS_AGINO_TO_INO(mp, agno,
563                                                                        agino);
564                                                 bno = XFS_AGB_TO_DADDR(mp, agno,
565                                                                        agbno);
566
567                                                 /*
568                                                  * Get the inode cluster buffer
569                                                  */
570                                                 ASSERT(xfs_inode_zone != NULL);
571                                                 ip = kmem_zone_zalloc(xfs_inode_zone,
572                                                                       KM_SLEEP);
573                                                 ip->i_ino = ino;
574                                                 ip->i_mount = mp;
575                                                 if (bp)
576                                                         xfs_buf_relse(bp);
577                                                 error = xfs_itobp(mp, NULL, ip,
578                                                                   &dip, &bp, bno);
579                                                 if (!error)
580                                                         clustidx = ip->i_boffset / mp->m_sb.sb_inodesize;
581                                                 kmem_zone_free(xfs_inode_zone, ip);
582                                                 if (XFS_TEST_ERROR(error != 0,
583                                                                    mp, XFS_ERRTAG_BULKSTAT_READ_CHUNK,
584                                                                    XFS_RANDOM_BULKSTAT_READ_CHUNK)) {
585                                                         bp = NULL;
586                                                         break;
587                                                 }
588                                         }
589                                 }
590                                 /*
591                                  * Skip if this inode is free.
592                                  */
593                                 if (XFS_INOBT_MASK(chunkidx) & INT_GET(irbp->ir_free, ARCH_CONVERT))
594                                         continue;
595                                 /*
596                                  * Count used inodes as free so we can tell
597                                  * when the chunk is used up.
598                                  */
599                                 INT_MOD(irbp->ir_freecount, ARCH_CONVERT, +1);
600                                 ino = XFS_AGINO_TO_INO(mp, agno, agino);
601                                 bno = XFS_AGB_TO_DADDR(mp, agno, agbno);
602                                 if (flags & BULKSTAT_FG_QUICK) {
603                                         dip = (xfs_dinode_t *)xfs_buf_offset(bp,
604                                               (clustidx << mp->m_sb.sb_inodelog));
605
606                                         if (INT_GET(dip->di_core.di_magic, ARCH_CONVERT)
607                                                     != XFS_DINODE_MAGIC
608                                             || !XFS_DINODE_GOOD_VERSION(
609                                                     INT_GET(dip->di_core.di_version, ARCH_CONVERT)))
610                                                 continue;
611                                 }
612
613                                 /*
614                                  * Get the inode and fill in a single buffer.
615                                  * BULKSTAT_FG_QUICK uses dip to fill it in.
616                                  * BULKSTAT_FG_IGET uses igets.
617                                  * See: xfs_bulkstat_one & xfs_dm_bulkstat_one.
618                                  * This is also used to count inodes/blks, etc
619                                  * in xfs_qm_quotacheck.
620                                  */
621                                 ubused = statstruct_size;
622                                 error = formatter(mp, ino, ubufp,
623                                                 ubleft, private_data,
624                                                 bno, &ubused, dip, &fmterror);
625                                 if (fmterror == BULKSTAT_RV_NOTHING) {
626                                         if (error == ENOMEM)
627                                                 ubleft = 0;
628                                         continue;
629                                 }
630                                 if (fmterror == BULKSTAT_RV_GIVEUP) {
631                                         ubleft = 0;
632                                         ASSERT(error);
633                                         rval = error;
634                                         break;
635                                 }
636                                 if (ubufp)
637                                         ubufp += ubused;
638                                 ubleft -= ubused;
639                                 ubelem++;
640                                 lastino = ino;
641                         }
642                 }
643
644                 if (bp)
645                         xfs_buf_relse(bp);
646
647                 /*
648                  * Set up for the next loop iteration.
649                  */
650                 if (ubleft > 0) {
651                         if (end_of_ag) {
652                                 agno++;
653                                 agino = 0;
654                         } else
655                                 agino = XFS_INO_TO_AGINO(mp, lastino);
656                 } else
657                         break;
658         }
659         /*
660          * Done, we're either out of filesystem or space to put the data.
661          */
662         kmem_free(irbuf, NBPC);
663         if (ubuffer)
664                 unuseracc(ubuffer, ubcount * statstruct_size, (B_READ|B_PHYS));
665         *ubcountp = ubelem;
666         if (agno >= mp->m_sb.sb_agcount) {
667                 /*
668                  * If we ran out of filesystem, mark lastino as off
669                  * the end of the filesystem, so the next call
670                  * will return immediately.
671                  */
672                 *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
673                 *done = 1;
674         } else
675                 *lastinop = (xfs_ino_t)lastino;
676
677         return rval;
678 }
679
680 /*
681  * Return stat information in bulk (by-inode) for the filesystem.
682  * Special case for non-sequential one inode bulkstat.
683  */
684 int                                     /* error status */
685 xfs_bulkstat_single(
686         xfs_mount_t             *mp,    /* mount point for filesystem */
687         xfs_ino_t               *lastinop, /* inode to return */
688         char                    __user *buffer, /* buffer with inode stats */
689         int                     *done)  /* 1 if there're more stats to get */
690 {
691         int                     count;  /* count value for bulkstat call */
692         int                     error;  /* return value */
693         xfs_ino_t               ino;    /* filesystem inode number */
694         int                     res;    /* result from bs1 */
695
696         /*
697          * note that requesting valid inode numbers which are not allocated
698          * to inodes will most likely cause xfs_itobp to generate warning
699          * messages about bad magic numbers. This is ok. The fact that
700          * the inode isn't actually an inode is handled by the
701          * error check below. Done this way to make the usual case faster
702          * at the expense of the error case.
703          */
704
705         ino = (xfs_ino_t)*lastinop;
706         error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
707                                  NULL, 0, NULL, NULL, &res);
708         if (error) {
709                 /*
710                  * Special case way failed, do it the "long" way
711                  * to see if that works.
712                  */
713                 (*lastinop)--;
714                 count = 1;
715                 if (xfs_bulkstat(mp, lastinop, &count, xfs_bulkstat_one,
716                                 NULL, sizeof(xfs_bstat_t), buffer,
717                                 BULKSTAT_FG_IGET, done))
718                         return error;
719                 if (count == 0 || (xfs_ino_t)*lastinop != ino)
720                         return error == EFSCORRUPTED ?
721                                 XFS_ERROR(EINVAL) : error;
722                 else
723                         return 0;
724         }
725         *done = 0;
726         return 0;
727 }
728
729 /*
730  * Return inode number table for the filesystem.
731  */
732 int                                     /* error status */
733 xfs_inumbers(
734         xfs_mount_t     *mp,            /* mount point for filesystem */
735         xfs_ino_t       *lastino,       /* last inode returned */
736         int             *count,         /* size of buffer/count returned */
737         xfs_inogrp_t    __user *ubuffer)/* buffer with inode descriptions */
738 {
739         xfs_buf_t       *agbp;
740         xfs_agino_t     agino;
741         xfs_agnumber_t  agno;
742         int             bcount;
743         xfs_inogrp_t    *buffer;
744         int             bufidx;
745         xfs_btree_cur_t *cur;
746         int             error;
747         __int32_t       gcnt;
748         xfs_inofree_t   gfree;
749         xfs_agino_t     gino;
750         int             i;
751         xfs_ino_t       ino;
752         int             left;
753         int             tmp;
754
755         ino = (xfs_ino_t)*lastino;
756         agno = XFS_INO_TO_AGNO(mp, ino);
757         agino = XFS_INO_TO_AGINO(mp, ino);
758         left = *count;
759         *count = 0;
760         bcount = MIN(left, (int)(NBPP / sizeof(*buffer)));
761         buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
762         error = bufidx = 0;
763         cur = NULL;
764         agbp = NULL;
765         while (left > 0 && agno < mp->m_sb.sb_agcount) {
766                 if (agbp == NULL) {
767                         down_read(&mp->m_peraglock);
768                         error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
769                         up_read(&mp->m_peraglock);
770                         if (error) {
771                                 /*
772                                  * If we can't read the AGI of this ag,
773                                  * then just skip to the next one.
774                                  */
775                                 ASSERT(cur == NULL);
776                                 agbp = NULL;
777                                 agno++;
778                                 agino = 0;
779                                 continue;
780                         }
781                         cur = xfs_btree_init_cursor(mp, NULL, agbp, agno,
782                                 XFS_BTNUM_INO, (xfs_inode_t *)0, 0);
783                         error = xfs_inobt_lookup_ge(cur, agino, 0, 0, &tmp);
784                         if (error) {
785                                 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
786                                 cur = NULL;
787                                 xfs_buf_relse(agbp);
788                                 agbp = NULL;
789                                 /*
790                                  * Move up the the last inode in the current
791                                  * chunk.  The lookup_ge will always get
792                                  * us the first inode in the next chunk.
793                                  */
794                                 agino += XFS_INODES_PER_CHUNK - 1;
795                                 continue;
796                         }
797                 }
798                 if ((error = xfs_inobt_get_rec(cur, &gino, &gcnt, &gfree,
799                         &i)) ||
800                     i == 0) {
801                         xfs_buf_relse(agbp);
802                         agbp = NULL;
803                         xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
804                         cur = NULL;
805                         agno++;
806                         agino = 0;
807                         continue;
808                 }
809                 agino = gino + XFS_INODES_PER_CHUNK - 1;
810                 buffer[bufidx].xi_startino = XFS_AGINO_TO_INO(mp, agno, gino);
811                 buffer[bufidx].xi_alloccount = XFS_INODES_PER_CHUNK - gcnt;
812                 buffer[bufidx].xi_allocmask = ~gfree;
813                 bufidx++;
814                 left--;
815                 if (bufidx == bcount) {
816                         if (copy_to_user(ubuffer, buffer,
817                                         bufidx * sizeof(*buffer))) {
818                                 error = XFS_ERROR(EFAULT);
819                                 break;
820                         }
821                         ubuffer += bufidx;
822                         *count += bufidx;
823                         bufidx = 0;
824                 }
825                 if (left) {
826                         error = xfs_inobt_increment(cur, 0, &tmp);
827                         if (error) {
828                                 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
829                                 cur = NULL;
830                                 xfs_buf_relse(agbp);
831                                 agbp = NULL;
832                                 /*
833                                  * The agino value has already been bumped.
834                                  * Just try to skip up to it.
835                                  */
836                                 agino += XFS_INODES_PER_CHUNK;
837                                 continue;
838                         }
839                 }
840         }
841         if (!error) {
842                 if (bufidx) {
843                         if (copy_to_user(ubuffer, buffer,
844                                         bufidx * sizeof(*buffer)))
845                                 error = XFS_ERROR(EFAULT);
846                         else
847                                 *count += bufidx;
848                 }
849                 *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
850         }
851         kmem_free(buffer, bcount * sizeof(*buffer));
852         if (cur)
853                 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
854                                            XFS_BTREE_NOERROR));
855         if (agbp)
856                 xfs_buf_relse(agbp);
857         return error;
858 }