Pull sim-fixes into release branch
[linux-2.6] / fs / xfs / quota / xfs_qm.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32
33 #include "xfs.h"
34 #include "xfs_fs.h"
35 #include "xfs_inum.h"
36 #include "xfs_log.h"
37 #include "xfs_clnt.h"
38 #include "xfs_trans.h"
39 #include "xfs_sb.h"
40 #include "xfs_ag.h"
41 #include "xfs_dir.h"
42 #include "xfs_dir2.h"
43 #include "xfs_alloc.h"
44 #include "xfs_dmapi.h"
45 #include "xfs_quota.h"
46 #include "xfs_mount.h"
47 #include "xfs_alloc_btree.h"
48 #include "xfs_bmap_btree.h"
49 #include "xfs_ialloc_btree.h"
50 #include "xfs_btree.h"
51 #include "xfs_ialloc.h"
52 #include "xfs_attr_sf.h"
53 #include "xfs_dir_sf.h"
54 #include "xfs_dir2_sf.h"
55 #include "xfs_dinode.h"
56 #include "xfs_inode.h"
57 #include "xfs_bmap.h"
58 #include "xfs_bit.h"
59 #include "xfs_rtalloc.h"
60 #include "xfs_error.h"
61 #include "xfs_itable.h"
62 #include "xfs_rw.h"
63 #include "xfs_acl.h"
64 #include "xfs_cap.h"
65 #include "xfs_mac.h"
66 #include "xfs_attr.h"
67 #include "xfs_buf_item.h"
68 #include "xfs_trans_space.h"
69 #include "xfs_utils.h"
70
71 #include "xfs_qm.h"
72
73 /*
74  * The global quota manager. There is only one of these for the entire
75  * system, _not_ one per file system. XQM keeps track of the overall
76  * quota functionality, including maintaining the freelist and hash
77  * tables of dquots.
78  */
79 mutex_t xfs_Gqm_lock;
80 struct xfs_qm   *xfs_Gqm;
81
82 kmem_zone_t     *qm_dqzone;
83 kmem_zone_t     *qm_dqtrxzone;
84 STATIC kmem_shaker_t    xfs_qm_shaker;
85
86 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
87 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
88
89 STATIC void     xfs_qm_freelist_init(xfs_frlist_t *);
90 STATIC void     xfs_qm_freelist_destroy(xfs_frlist_t *);
91 STATIC int      xfs_qm_mplist_nowait(xfs_mount_t *);
92 STATIC int      xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
93
94 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
95 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
96 STATIC int      xfs_qm_shake(int, unsigned int);
97
98 #ifdef DEBUG
99 extern mutex_t  qcheck_lock;
100 #endif
101
102 #ifdef QUOTADEBUG
103 #define XQM_LIST_PRINT(l, NXT, title) \
104 { \
105         xfs_dquot_t     *dqp; int i = 0; \
106         cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
107         for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
108                 cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
109                                   "bcnt = %d, icnt = %d, refs = %d", \
110                         ++i, (int) INT_GET(dqp->q_core.d_id, ARCH_CONVERT), \
111                         DQFLAGTO_TYPESTR(dqp),       \
112                         (int) INT_GET(dqp->q_core.d_bcount, ARCH_CONVERT), \
113                         (int) INT_GET(dqp->q_core.d_icount, ARCH_CONVERT), \
114                         (int) dqp->q_nrefs);  } \
115 }
116 #else
117 #define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
118 #endif
119
120 /*
121  * Initialize the XQM structure.
122  * Note that there is not one quota manager per file system.
123  */
124 STATIC struct xfs_qm *
125 xfs_Gqm_init(void)
126 {
127         xfs_qm_t                *xqm;
128         int                     hsize, i;
129
130         xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
131         ASSERT(xqm);
132
133         /*
134          * Initialize the dquot hash tables.
135          */
136         hsize = (DQUOT_HASH_HEURISTIC < XFS_QM_NCSIZE_THRESHOLD) ?
137                 XFS_QM_HASHSIZE_LOW : XFS_QM_HASHSIZE_HIGH;
138         xqm->qm_dqhashmask = hsize - 1;
139
140         xqm->qm_usr_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
141                                                       sizeof(xfs_dqhash_t),
142                                                       KM_SLEEP);
143         xqm->qm_grp_dqhtable = (xfs_dqhash_t *)kmem_zalloc(hsize *
144                                                       sizeof(xfs_dqhash_t),
145                                                       KM_SLEEP);
146         ASSERT(xqm->qm_usr_dqhtable != NULL);
147         ASSERT(xqm->qm_grp_dqhtable != NULL);
148
149         for (i = 0; i < hsize; i++) {
150                 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
151                 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
152         }
153
154         /*
155          * Freelist of all dquots of all file systems
156          */
157         xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
158
159         /*
160          * dquot zone. we register our own low-memory callback.
161          */
162         if (!qm_dqzone) {
163                 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
164                                                 "xfs_dquots");
165                 qm_dqzone = xqm->qm_dqzone;
166         } else
167                 xqm->qm_dqzone = qm_dqzone;
168
169         xfs_qm_shaker = kmem_shake_register(xfs_qm_shake);
170
171         /*
172          * The t_dqinfo portion of transactions.
173          */
174         if (!qm_dqtrxzone) {
175                 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
176                                                    "xfs_dqtrx");
177                 qm_dqtrxzone = xqm->qm_dqtrxzone;
178         } else
179                 xqm->qm_dqtrxzone = qm_dqtrxzone;
180
181         atomic_set(&xqm->qm_totaldquots, 0);
182         xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
183         xqm->qm_nrefs = 0;
184 #ifdef DEBUG
185         mutex_init(&qcheck_lock, MUTEX_DEFAULT, "qchk");
186 #endif
187         return xqm;
188 }
189
190 /*
191  * Destroy the global quota manager when its reference count goes to zero.
192  */
193 STATIC void
194 xfs_qm_destroy(
195         struct xfs_qm   *xqm)
196 {
197         int             hsize, i;
198
199         ASSERT(xqm != NULL);
200         ASSERT(xqm->qm_nrefs == 0);
201         kmem_shake_deregister(xfs_qm_shaker);
202         hsize = xqm->qm_dqhashmask + 1;
203         for (i = 0; i < hsize; i++) {
204                 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
205                 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
206         }
207         kmem_free(xqm->qm_usr_dqhtable, hsize * sizeof(xfs_dqhash_t));
208         kmem_free(xqm->qm_grp_dqhtable, hsize * sizeof(xfs_dqhash_t));
209         xqm->qm_usr_dqhtable = NULL;
210         xqm->qm_grp_dqhtable = NULL;
211         xqm->qm_dqhashmask = 0;
212         xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
213 #ifdef DEBUG
214         mutex_destroy(&qcheck_lock);
215 #endif
216         kmem_free(xqm, sizeof(xfs_qm_t));
217 }
218
219 /*
220  * Called at mount time to let XQM know that another file system is
221  * starting quotas. This isn't crucial information as the individual mount
222  * structures are pretty independent, but it helps the XQM keep a
223  * global view of what's going on.
224  */
225 /* ARGSUSED */
226 STATIC int
227 xfs_qm_hold_quotafs_ref(
228         struct xfs_mount *mp)
229 {
230         /*
231          * Need to lock the xfs_Gqm structure for things like this. For example,
232          * the structure could disappear between the entry to this routine and
233          * a HOLD operation if not locked.
234          */
235         XFS_QM_LOCK(xfs_Gqm);
236
237         if (xfs_Gqm == NULL)
238                 xfs_Gqm = xfs_Gqm_init();
239         /*
240          * We can keep a list of all filesystems with quotas mounted for
241          * debugging and statistical purposes, but ...
242          * Just take a reference and get out.
243          */
244         XFS_QM_HOLD(xfs_Gqm);
245         XFS_QM_UNLOCK(xfs_Gqm);
246
247         return 0;
248 }
249
250
251 /*
252  * Release the reference that a filesystem took at mount time,
253  * so that we know when we need to destroy the entire quota manager.
254  */
255 /* ARGSUSED */
256 STATIC void
257 xfs_qm_rele_quotafs_ref(
258         struct xfs_mount *mp)
259 {
260         xfs_dquot_t     *dqp, *nextdqp;
261
262         ASSERT(xfs_Gqm);
263         ASSERT(xfs_Gqm->qm_nrefs > 0);
264
265         /*
266          * Go thru the freelist and destroy all inactive dquots.
267          */
268         xfs_qm_freelist_lock(xfs_Gqm);
269
270         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
271              dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
272                 xfs_dqlock(dqp);
273                 nextdqp = dqp->dq_flnext;
274                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
275                         ASSERT(dqp->q_mount == NULL);
276                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
277                         ASSERT(dqp->HL_PREVP == NULL);
278                         ASSERT(dqp->MPL_PREVP == NULL);
279                         XQM_FREELIST_REMOVE(dqp);
280                         xfs_dqunlock(dqp);
281                         xfs_qm_dqdestroy(dqp);
282                 } else {
283                         xfs_dqunlock(dqp);
284                 }
285                 dqp = nextdqp;
286         }
287         xfs_qm_freelist_unlock(xfs_Gqm);
288
289         /*
290          * Destroy the entire XQM. If somebody mounts with quotaon, this'll
291          * be restarted.
292          */
293         XFS_QM_LOCK(xfs_Gqm);
294         XFS_QM_RELE(xfs_Gqm);
295         if (xfs_Gqm->qm_nrefs == 0) {
296                 xfs_qm_destroy(xfs_Gqm);
297                 xfs_Gqm = NULL;
298         }
299         XFS_QM_UNLOCK(xfs_Gqm);
300 }
301
302 /*
303  * This is called at mount time from xfs_mountfs to initialize the quotainfo
304  * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
305  * so already.  Note that the superblock has not been read in yet.
306  */
307 void
308 xfs_qm_mount_quotainit(
309         xfs_mount_t     *mp,
310         uint            flags)
311 {
312         /*
313          * User, projects or group quotas has to be on.
314          */
315         ASSERT(flags & (XFSMNT_UQUOTA | XFSMNT_PQUOTA | XFSMNT_GQUOTA));
316
317         /*
318          * Initialize the flags in the mount structure. From this point
319          * onwards we look at m_qflags to figure out if quotas's ON/OFF, etc.
320          * Note that we enforce nothing if accounting is off.
321          * ie.  XFSMNT_*QUOTA must be ON for XFSMNT_*QUOTAENF.
322          * It isn't necessary to take the quotaoff lock to do this; this is
323          * called from mount.
324          */
325         if (flags & XFSMNT_UQUOTA) {
326                 mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
327                 if (flags & XFSMNT_UQUOTAENF)
328                         mp->m_qflags |= XFS_UQUOTA_ENFD;
329         }
330         if (flags & XFSMNT_GQUOTA) {
331                 mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
332                 if (flags & XFSMNT_GQUOTAENF)
333                         mp->m_qflags |= XFS_OQUOTA_ENFD;
334         } else if (flags & XFSMNT_PQUOTA) {
335                 mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
336                 if (flags & XFSMNT_PQUOTAENF)
337                         mp->m_qflags |= XFS_OQUOTA_ENFD;
338         }
339 }
340
341 /*
342  * Just destroy the quotainfo structure.
343  */
344 void
345 xfs_qm_unmount_quotadestroy(
346         xfs_mount_t     *mp)
347 {
348         if (mp->m_quotainfo)
349                 xfs_qm_destroy_quotainfo(mp);
350 }
351
352
353 /*
354  * This is called from xfs_mountfs to start quotas and initialize all
355  * necessary data structures like quotainfo.  This is also responsible for
356  * running a quotacheck as necessary.  We are guaranteed that the superblock
357  * is consistently read in at this point.
358  */
359 int
360 xfs_qm_mount_quotas(
361         xfs_mount_t     *mp,
362         int             mfsi_flags)
363 {
364         unsigned long   s;
365         int             error = 0;
366         uint            sbf;
367
368
369         /*
370          * If quotas on realtime volumes is not supported, we disable
371          * quotas immediately.
372          */
373         if (mp->m_sb.sb_rextents) {
374                 cmn_err(CE_NOTE,
375                         "Cannot turn on quotas for realtime filesystem %s",
376                         mp->m_fsname);
377                 mp->m_qflags = 0;
378                 goto write_changes;
379         }
380
381         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
382
383         /*
384          * Allocate the quotainfo structure inside the mount struct, and
385          * create quotainode(s), and change/rev superblock if necessary.
386          */
387         if ((error = xfs_qm_init_quotainfo(mp))) {
388                 /*
389                  * We must turn off quotas.
390                  */
391                 ASSERT(mp->m_quotainfo == NULL);
392                 mp->m_qflags = 0;
393                 goto write_changes;
394         }
395         /*
396          * If any of the quotas are not consistent, do a quotacheck.
397          */
398         if (XFS_QM_NEED_QUOTACHECK(mp) &&
399                 !(mfsi_flags & XFS_MFSI_NO_QUOTACHECK)) {
400                 if ((error = xfs_qm_quotacheck(mp))) {
401                         /* Quotacheck has failed and quotas have
402                          * been disabled.
403                          */
404                         return XFS_ERROR(error);
405                 }
406         }
407
408  write_changes:
409         /*
410          * We actually don't have to acquire the SB_LOCK at all.
411          * This can only be called from mount, and that's single threaded. XXX
412          */
413         s = XFS_SB_LOCK(mp);
414         sbf = mp->m_sb.sb_qflags;
415         mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
416         XFS_SB_UNLOCK(mp, s);
417
418         if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
419                 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
420                         /*
421                          * We could only have been turning quotas off.
422                          * We aren't in very good shape actually because
423                          * the incore structures are convinced that quotas are
424                          * off, but the on disk superblock doesn't know that !
425                          */
426                         ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
427                         xfs_fs_cmn_err(CE_ALERT, mp,
428                                 "XFS mount_quotas: Superblock update failed!");
429                 }
430         }
431
432         if (error) {
433                 xfs_fs_cmn_err(CE_WARN, mp,
434                         "Failed to initialize disk quotas.");
435         }
436         return XFS_ERROR(error);
437 }
438
439 /*
440  * Called from the vfsops layer.
441  */
442 int
443 xfs_qm_unmount_quotas(
444         xfs_mount_t     *mp)
445 {
446         xfs_inode_t     *uqp, *gqp;
447         int             error = 0;
448
449         /*
450          * Release the dquots that root inode, et al might be holding,
451          * before we flush quotas and blow away the quotainfo structure.
452          */
453         ASSERT(mp->m_rootip);
454         xfs_qm_dqdetach(mp->m_rootip);
455         if (mp->m_rbmip)
456                 xfs_qm_dqdetach(mp->m_rbmip);
457         if (mp->m_rsumip)
458                 xfs_qm_dqdetach(mp->m_rsumip);
459
460         /*
461          * Flush out the quota inodes.
462          */
463         uqp = gqp = NULL;
464         if (mp->m_quotainfo) {
465                 if ((uqp = mp->m_quotainfo->qi_uquotaip) != NULL) {
466                         xfs_ilock(uqp, XFS_ILOCK_EXCL);
467                         xfs_iflock(uqp);
468                         error = xfs_iflush(uqp, XFS_IFLUSH_SYNC);
469                         xfs_iunlock(uqp, XFS_ILOCK_EXCL);
470                         if (unlikely(error == EFSCORRUPTED)) {
471                                 XFS_ERROR_REPORT("xfs_qm_unmount_quotas(1)",
472                                                  XFS_ERRLEVEL_LOW, mp);
473                                 goto out;
474                         }
475                 }
476                 if ((gqp = mp->m_quotainfo->qi_gquotaip) != NULL) {
477                         xfs_ilock(gqp, XFS_ILOCK_EXCL);
478                         xfs_iflock(gqp);
479                         error = xfs_iflush(gqp, XFS_IFLUSH_SYNC);
480                         xfs_iunlock(gqp, XFS_ILOCK_EXCL);
481                         if (unlikely(error == EFSCORRUPTED)) {
482                                 XFS_ERROR_REPORT("xfs_qm_unmount_quotas(2)",
483                                                  XFS_ERRLEVEL_LOW, mp);
484                                 goto out;
485                         }
486                 }
487         }
488         if (uqp) {
489                  XFS_PURGE_INODE(uqp);
490                  mp->m_quotainfo->qi_uquotaip = NULL;
491         }
492         if (gqp) {
493                 XFS_PURGE_INODE(gqp);
494                 mp->m_quotainfo->qi_gquotaip = NULL;
495         }
496 out:
497         return XFS_ERROR(error);
498 }
499
500 /*
501  * Flush all dquots of the given file system to disk. The dquots are
502  * _not_ purged from memory here, just their data written to disk.
503  */
504 STATIC int
505 xfs_qm_dqflush_all(
506         xfs_mount_t     *mp,
507         int             flags)
508 {
509         int             recl;
510         xfs_dquot_t     *dqp;
511         int             niters;
512         int             error;
513
514         if (mp->m_quotainfo == NULL)
515                 return (0);
516         niters = 0;
517 again:
518         xfs_qm_mplist_lock(mp);
519         FOREACH_DQUOT_IN_MP(dqp, mp) {
520                 xfs_dqlock(dqp);
521                 if (! XFS_DQ_IS_DIRTY(dqp)) {
522                         xfs_dqunlock(dqp);
523                         continue;
524                 }
525                 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
526                 /* XXX a sentinel would be better */
527                 recl = XFS_QI_MPLRECLAIMS(mp);
528                 if (! xfs_qm_dqflock_nowait(dqp)) {
529                         /*
530                          * If we can't grab the flush lock then check
531                          * to see if the dquot has been flushed delayed
532                          * write.  If so, grab its buffer and send it
533                          * out immediately.  We'll be able to acquire
534                          * the flush lock when the I/O completes.
535                          */
536                         xfs_qm_dqflock_pushbuf_wait(dqp);
537                 }
538                 /*
539                  * Let go of the mplist lock. We don't want to hold it
540                  * across a disk write.
541                  */
542                 xfs_qm_mplist_unlock(mp);
543                 error = xfs_qm_dqflush(dqp, flags);
544                 xfs_dqunlock(dqp);
545                 if (error)
546                         return (error);
547
548                 xfs_qm_mplist_lock(mp);
549                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
550                         xfs_qm_mplist_unlock(mp);
551                         /* XXX restart limit */
552                         goto again;
553                 }
554         }
555
556         xfs_qm_mplist_unlock(mp);
557         /* return ! busy */
558         return (0);
559 }
560 /*
561  * Release the group dquot pointers the user dquots may be
562  * carrying around as a hint. mplist is locked on entry and exit.
563  */
564 STATIC void
565 xfs_qm_detach_gdquots(
566         xfs_mount_t     *mp)
567 {
568         xfs_dquot_t     *dqp, *gdqp;
569         int             nrecl;
570
571  again:
572         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
573         dqp = XFS_QI_MPLNEXT(mp);
574         while (dqp) {
575                 xfs_dqlock(dqp);
576                 if ((gdqp = dqp->q_gdquot)) {
577                         xfs_dqlock(gdqp);
578                         dqp->q_gdquot = NULL;
579                 }
580                 xfs_dqunlock(dqp);
581
582                 if (gdqp) {
583                         /*
584                          * Can't hold the mplist lock across a dqput.
585                          * XXXmust convert to marker based iterations here.
586                          */
587                         nrecl = XFS_QI_MPLRECLAIMS(mp);
588                         xfs_qm_mplist_unlock(mp);
589                         xfs_qm_dqput(gdqp);
590
591                         xfs_qm_mplist_lock(mp);
592                         if (nrecl != XFS_QI_MPLRECLAIMS(mp))
593                                 goto again;
594                 }
595                 dqp = dqp->MPL_NEXT;
596         }
597 }
598
599 /*
600  * Go through all the incore dquots of this file system and take them
601  * off the mplist and hashlist, if the dquot type matches the dqtype
602  * parameter. This is used when turning off quota accounting for
603  * users and/or groups, as well as when the filesystem is unmounting.
604  */
605 STATIC int
606 xfs_qm_dqpurge_int(
607         xfs_mount_t     *mp,
608         uint            flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
609 {
610         xfs_dquot_t     *dqp;
611         uint            dqtype;
612         int             nrecl;
613         xfs_dquot_t     *nextdqp;
614         int             nmisses;
615
616         if (mp->m_quotainfo == NULL)
617                 return (0);
618
619         dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
620         dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
621         dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
622
623         xfs_qm_mplist_lock(mp);
624
625         /*
626          * In the first pass through all incore dquots of this filesystem,
627          * we release the group dquot pointers the user dquots may be
628          * carrying around as a hint. We need to do this irrespective of
629          * what's being turned off.
630          */
631         xfs_qm_detach_gdquots(mp);
632
633       again:
634         nmisses = 0;
635         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
636         /*
637          * Try to get rid of all of the unwanted dquots. The idea is to
638          * get them off mplist and hashlist, but leave them on freelist.
639          */
640         dqp = XFS_QI_MPLNEXT(mp);
641         while (dqp) {
642                 /*
643                  * It's OK to look at the type without taking dqlock here.
644                  * We're holding the mplist lock here, and that's needed for
645                  * a dqreclaim.
646                  */
647                 if ((dqp->dq_flags & dqtype) == 0) {
648                         dqp = dqp->MPL_NEXT;
649                         continue;
650                 }
651
652                 if (! xfs_qm_dqhashlock_nowait(dqp)) {
653                         nrecl = XFS_QI_MPLRECLAIMS(mp);
654                         xfs_qm_mplist_unlock(mp);
655                         XFS_DQ_HASH_LOCK(dqp->q_hash);
656                         xfs_qm_mplist_lock(mp);
657
658                         /*
659                          * XXXTheoretically, we can get into a very long
660                          * ping pong game here.
661                          * No one can be adding dquots to the mplist at
662                          * this point, but somebody might be taking things off.
663                          */
664                         if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
665                                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
666                                 goto again;
667                         }
668                 }
669
670                 /*
671                  * Take the dquot off the mplist and hashlist. It may remain on
672                  * freelist in INACTIVE state.
673                  */
674                 nextdqp = dqp->MPL_NEXT;
675                 nmisses += xfs_qm_dqpurge(dqp, flags);
676                 dqp = nextdqp;
677         }
678         xfs_qm_mplist_unlock(mp);
679         return nmisses;
680 }
681
682 int
683 xfs_qm_dqpurge_all(
684         xfs_mount_t     *mp,
685         uint            flags)
686 {
687         int             ndquots;
688
689         /*
690          * Purge the dquot cache.
691          * None of the dquots should really be busy at this point.
692          */
693         if (mp->m_quotainfo) {
694                 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
695                         delay(ndquots * 10);
696                 }
697         }
698         return 0;
699 }
700
701 STATIC int
702 xfs_qm_dqattach_one(
703         xfs_inode_t     *ip,
704         xfs_dqid_t      id,
705         uint            type,
706         uint            doalloc,
707         uint            dolock,
708         xfs_dquot_t     *udqhint, /* hint */
709         xfs_dquot_t     **IO_idqpp)
710 {
711         xfs_dquot_t     *dqp;
712         int             error;
713
714         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
715         error = 0;
716         /*
717          * See if we already have it in the inode itself. IO_idqpp is
718          * &i_udquot or &i_gdquot. This made the code look weird, but
719          * made the logic a lot simpler.
720          */
721         if ((dqp = *IO_idqpp)) {
722                 if (dolock)
723                         xfs_dqlock(dqp);
724                 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
725                 goto done;
726         }
727
728         /*
729          * udqhint is the i_udquot field in inode, and is non-NULL only
730          * when the type arg is group/project. Its purpose is to save a
731          * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
732          * the user dquot.
733          */
734         ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
735         if (udqhint && !dolock)
736                 xfs_dqlock(udqhint);
737
738         /*
739          * No need to take dqlock to look at the id.
740          * The ID can't change until it gets reclaimed, and it won't
741          * be reclaimed as long as we have a ref from inode and we hold
742          * the ilock.
743          */
744         if (udqhint &&
745             (dqp = udqhint->q_gdquot) &&
746             (INT_GET(dqp->q_core.d_id, ARCH_CONVERT) == id)) {
747                 ASSERT(XFS_DQ_IS_LOCKED(udqhint));
748                 xfs_dqlock(dqp);
749                 XFS_DQHOLD(dqp);
750                 ASSERT(*IO_idqpp == NULL);
751                 *IO_idqpp = dqp;
752                 if (!dolock) {
753                         xfs_dqunlock(dqp);
754                         xfs_dqunlock(udqhint);
755                 }
756                 goto done;
757         }
758         /*
759          * We can't hold a dquot lock when we call the dqget code.
760          * We'll deadlock in no time, because of (not conforming to)
761          * lock ordering - the inodelock comes before any dquot lock,
762          * and we may drop and reacquire the ilock in xfs_qm_dqget().
763          */
764         if (udqhint)
765                 xfs_dqunlock(udqhint);
766         /*
767          * Find the dquot from somewhere. This bumps the
768          * reference count of dquot and returns it locked.
769          * This can return ENOENT if dquot didn't exist on
770          * disk and we didn't ask it to allocate;
771          * ESRCH if quotas got turned off suddenly.
772          */
773         if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
774                                  doalloc|XFS_QMOPT_DOWARN, &dqp))) {
775                 if (udqhint && dolock)
776                         xfs_dqlock(udqhint);
777                 goto done;
778         }
779
780         xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
781         /*
782          * dqget may have dropped and re-acquired the ilock, but it guarantees
783          * that the dquot returned is the one that should go in the inode.
784          */
785         *IO_idqpp = dqp;
786         ASSERT(dqp);
787         ASSERT(XFS_DQ_IS_LOCKED(dqp));
788         if (! dolock) {
789                 xfs_dqunlock(dqp);
790                 goto done;
791         }
792         if (! udqhint)
793                 goto done;
794
795         ASSERT(udqhint);
796         ASSERT(dolock);
797         ASSERT(XFS_DQ_IS_LOCKED(dqp));
798         if (! xfs_qm_dqlock_nowait(udqhint)) {
799                 xfs_dqunlock(dqp);
800                 xfs_dqlock(udqhint);
801                 xfs_dqlock(dqp);
802         }
803       done:
804 #ifdef QUOTADEBUG
805         if (udqhint) {
806                 if (dolock)
807                         ASSERT(XFS_DQ_IS_LOCKED(udqhint));
808         }
809         if (! error) {
810                 if (dolock)
811                         ASSERT(XFS_DQ_IS_LOCKED(dqp));
812         }
813 #endif
814         return (error);
815 }
816
817
818 /*
819  * Given a udquot and gdquot, attach a ptr to the group dquot in the
820  * udquot as a hint for future lookups. The idea sounds simple, but the
821  * execution isn't, because the udquot might have a group dquot attached
822  * already and getting rid of that gets us into lock ordering contraints.
823  * The process is complicated more by the fact that the dquots may or may not
824  * be locked on entry.
825  */
826 STATIC void
827 xfs_qm_dqattach_grouphint(
828         xfs_dquot_t     *udq,
829         xfs_dquot_t     *gdq,
830         uint            locked)
831 {
832         xfs_dquot_t     *tmp;
833
834 #ifdef QUOTADEBUG
835         if (locked) {
836                 ASSERT(XFS_DQ_IS_LOCKED(udq));
837                 ASSERT(XFS_DQ_IS_LOCKED(gdq));
838         }
839 #endif
840         if (! locked)
841                 xfs_dqlock(udq);
842
843         if ((tmp = udq->q_gdquot)) {
844                 if (tmp == gdq) {
845                         if (! locked)
846                                 xfs_dqunlock(udq);
847                         return;
848                 }
849
850                 udq->q_gdquot = NULL;
851                 /*
852                  * We can't keep any dqlocks when calling dqrele,
853                  * because the freelist lock comes before dqlocks.
854                  */
855                 xfs_dqunlock(udq);
856                 if (locked)
857                         xfs_dqunlock(gdq);
858                 /*
859                  * we took a hard reference once upon a time in dqget,
860                  * so give it back when the udquot no longer points at it
861                  * dqput() does the unlocking of the dquot.
862                  */
863                 xfs_qm_dqrele(tmp);
864
865                 xfs_dqlock(udq);
866                 xfs_dqlock(gdq);
867
868         } else {
869                 ASSERT(XFS_DQ_IS_LOCKED(udq));
870                 if (! locked) {
871                         xfs_dqlock(gdq);
872                 }
873         }
874
875         ASSERT(XFS_DQ_IS_LOCKED(udq));
876         ASSERT(XFS_DQ_IS_LOCKED(gdq));
877         /*
878          * Somebody could have attached a gdquot here,
879          * when we dropped the uqlock. If so, just do nothing.
880          */
881         if (udq->q_gdquot == NULL) {
882                 XFS_DQHOLD(gdq);
883                 udq->q_gdquot = gdq;
884         }
885         if (! locked) {
886                 xfs_dqunlock(gdq);
887                 xfs_dqunlock(udq);
888         }
889 }
890
891
892 /*
893  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
894  * into account.
895  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
896  * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
897  * much made this code a complete mess, but it has been pretty useful.
898  * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
899  * Inode may get unlocked and relocked in here, and the caller must deal with
900  * the consequences.
901  */
902 int
903 xfs_qm_dqattach(
904         xfs_inode_t     *ip,
905         uint            flags)
906 {
907         xfs_mount_t     *mp = ip->i_mount;
908         uint            nquotas = 0;
909         int             error = 0;
910
911         if ((! XFS_IS_QUOTA_ON(mp)) ||
912             (! XFS_NOT_DQATTACHED(mp, ip)) ||
913             (ip->i_ino == mp->m_sb.sb_uquotino) ||
914             (ip->i_ino == mp->m_sb.sb_gquotino))
915                 return (0);
916
917         ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
918                XFS_ISLOCKED_INODE_EXCL(ip));
919
920         if (! (flags & XFS_QMOPT_ILOCKED))
921                 xfs_ilock(ip, XFS_ILOCK_EXCL);
922
923         if (XFS_IS_UQUOTA_ON(mp)) {
924                 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
925                                                 flags & XFS_QMOPT_DQALLOC,
926                                                 flags & XFS_QMOPT_DQLOCK,
927                                                 NULL, &ip->i_udquot);
928                 if (error)
929                         goto done;
930                 nquotas++;
931         }
932         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
933         if (XFS_IS_OQUOTA_ON(mp)) {
934                 error = XFS_IS_GQUOTA_ON(mp) ?
935                         xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
936                                                 flags & XFS_QMOPT_DQALLOC,
937                                                 flags & XFS_QMOPT_DQLOCK,
938                                                 ip->i_udquot, &ip->i_gdquot) :
939                         xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
940                                                 flags & XFS_QMOPT_DQALLOC,
941                                                 flags & XFS_QMOPT_DQLOCK,
942                                                 ip->i_udquot, &ip->i_gdquot);
943                 /*
944                  * Don't worry about the udquot that we may have
945                  * attached above. It'll get detached, if not already.
946                  */
947                 if (error)
948                         goto done;
949                 nquotas++;
950         }
951
952         /*
953          * Attach this group quota to the user quota as a hint.
954          * This WON'T, in general, result in a thrash.
955          */
956         if (nquotas == 2) {
957                 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
958                 ASSERT(ip->i_udquot);
959                 ASSERT(ip->i_gdquot);
960
961                 /*
962                  * We may or may not have the i_udquot locked at this point,
963                  * but this check is OK since we don't depend on the i_gdquot to
964                  * be accurate 100% all the time. It is just a hint, and this
965                  * will succeed in general.
966                  */
967                 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
968                         goto done;
969                 /*
970                  * Attach i_gdquot to the gdquot hint inside the i_udquot.
971                  */
972                 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
973                                          flags & XFS_QMOPT_DQLOCK);
974         }
975
976       done:
977
978 #ifdef QUOTADEBUG
979         if (! error) {
980                 if (ip->i_udquot) {
981                         if (flags & XFS_QMOPT_DQLOCK)
982                                 ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
983                 }
984                 if (ip->i_gdquot) {
985                         if (flags & XFS_QMOPT_DQLOCK)
986                                 ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
987                 }
988                 if (XFS_IS_UQUOTA_ON(mp))
989                         ASSERT(ip->i_udquot);
990                 if (XFS_IS_OQUOTA_ON(mp))
991                         ASSERT(ip->i_gdquot);
992         }
993 #endif
994
995         if (! (flags & XFS_QMOPT_ILOCKED))
996                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
997
998 #ifdef QUOTADEBUG
999         else
1000                 ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
1001 #endif
1002         return (error);
1003 }
1004
1005 /*
1006  * Release dquots (and their references) if any.
1007  * The inode should be locked EXCL except when this's called by
1008  * xfs_ireclaim.
1009  */
1010 void
1011 xfs_qm_dqdetach(
1012         xfs_inode_t     *ip)
1013 {
1014         if (!(ip->i_udquot || ip->i_gdquot))
1015                 return;
1016
1017         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
1018         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
1019         if (ip->i_udquot) {
1020                 xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
1021                 xfs_qm_dqrele(ip->i_udquot);
1022                 ip->i_udquot = NULL;
1023         }
1024         if (ip->i_gdquot) {
1025                 xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
1026                 xfs_qm_dqrele(ip->i_gdquot);
1027                 ip->i_gdquot = NULL;
1028         }
1029 }
1030
1031 /*
1032  * This is called by VFS_SYNC and flags arg determines the caller,
1033  * and its motives, as done in xfs_sync.
1034  *
1035  * vfs_sync: SYNC_FSDATA|SYNC_ATTR|SYNC_BDFLUSH 0x31
1036  * syscall sync: SYNC_FSDATA|SYNC_ATTR|SYNC_DELWRI 0x25
1037  * umountroot : SYNC_WAIT | SYNC_CLOSE | SYNC_ATTR | SYNC_FSDATA
1038  */
1039
1040 int
1041 xfs_qm_sync(
1042         xfs_mount_t     *mp,
1043         short           flags)
1044 {
1045         int             recl, restarts;
1046         xfs_dquot_t     *dqp;
1047         uint            flush_flags;
1048         boolean_t       nowait;
1049         int             error;
1050
1051         restarts = 0;
1052         /*
1053          * We won't block unless we are asked to.
1054          */
1055         nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
1056
1057   again:
1058         xfs_qm_mplist_lock(mp);
1059         /*
1060          * dqpurge_all() also takes the mplist lock and iterate thru all dquots
1061          * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
1062          * when we have the mplist lock, we know that dquots will be consistent
1063          * as long as we have it locked.
1064          */
1065         if (! XFS_IS_QUOTA_ON(mp)) {
1066                 xfs_qm_mplist_unlock(mp);
1067                 return (0);
1068         }
1069         FOREACH_DQUOT_IN_MP(dqp, mp) {
1070                 /*
1071                  * If this is vfs_sync calling, then skip the dquots that
1072                  * don't 'seem' to be dirty. ie. don't acquire dqlock.
1073                  * This is very similar to what xfs_sync does with inodes.
1074                  */
1075                 if (flags & SYNC_BDFLUSH) {
1076                         if (! XFS_DQ_IS_DIRTY(dqp))
1077                                 continue;
1078                 }
1079
1080                 if (nowait) {
1081                         /*
1082                          * Try to acquire the dquot lock. We are NOT out of
1083                          * lock order, but we just don't want to wait for this
1084                          * lock, unless somebody wanted us to.
1085                          */
1086                         if (! xfs_qm_dqlock_nowait(dqp))
1087                                 continue;
1088                 } else {
1089                         xfs_dqlock(dqp);
1090                 }
1091
1092                 /*
1093                  * Now, find out for sure if this dquot is dirty or not.
1094                  */
1095                 if (! XFS_DQ_IS_DIRTY(dqp)) {
1096                         xfs_dqunlock(dqp);
1097                         continue;
1098                 }
1099
1100                 /* XXX a sentinel would be better */
1101                 recl = XFS_QI_MPLRECLAIMS(mp);
1102                 if (! xfs_qm_dqflock_nowait(dqp)) {
1103                         if (nowait) {
1104                                 xfs_dqunlock(dqp);
1105                                 continue;
1106                         }
1107                         /*
1108                          * If we can't grab the flush lock then if the caller
1109                          * really wanted us to give this our best shot,
1110                          * see if we can give a push to the buffer before we wait
1111                          * on the flush lock. At this point, we know that
1112                          * eventhough the dquot is being flushed,
1113                          * it has (new) dirty data.
1114                          */
1115                         xfs_qm_dqflock_pushbuf_wait(dqp);
1116                 }
1117                 /*
1118                  * Let go of the mplist lock. We don't want to hold it
1119                  * across a disk write
1120                  */
1121                 flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
1122                 xfs_qm_mplist_unlock(mp);
1123                 xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
1124                 error = xfs_qm_dqflush(dqp, flush_flags);
1125                 xfs_dqunlock(dqp);
1126                 if (error && XFS_FORCED_SHUTDOWN(mp))
1127                         return(0);      /* Need to prevent umount failure */
1128                 else if (error)
1129                         return (error);
1130
1131                 xfs_qm_mplist_lock(mp);
1132                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
1133                         if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1134                                 break;
1135
1136                         xfs_qm_mplist_unlock(mp);
1137                         goto again;
1138                 }
1139         }
1140
1141         xfs_qm_mplist_unlock(mp);
1142         return (0);
1143 }
1144
1145
1146 /*
1147  * This initializes all the quota information that's kept in the
1148  * mount structure
1149  */
1150 STATIC int
1151 xfs_qm_init_quotainfo(
1152         xfs_mount_t     *mp)
1153 {
1154         xfs_quotainfo_t *qinf;
1155         int             error;
1156         xfs_dquot_t     *dqp;
1157
1158         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1159
1160         /*
1161          * Tell XQM that we exist as soon as possible.
1162          */
1163         if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1164                 return (error);
1165         }
1166
1167         qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
1168
1169         /*
1170          * See if quotainodes are setup, and if not, allocate them,
1171          * and change the superblock accordingly.
1172          */
1173         if ((error = xfs_qm_init_quotainos(mp))) {
1174                 kmem_free(qinf, sizeof(xfs_quotainfo_t));
1175                 mp->m_quotainfo = NULL;
1176                 return (error);
1177         }
1178
1179         spinlock_init(&qinf->qi_pinlock, "xfs_qinf_pin");
1180         xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1181         qinf->qi_dqreclaims = 0;
1182
1183         /* mutex used to serialize quotaoffs */
1184         mutex_init(&qinf->qi_quotaofflock, MUTEX_DEFAULT, "qoff");
1185
1186         /* Precalc some constants */
1187         qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1188         ASSERT(qinf->qi_dqchunklen);
1189         qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
1190         do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
1191
1192         mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
1193
1194         /*
1195          * We try to get the limits from the superuser's limits fields.
1196          * This is quite hacky, but it is standard quota practice.
1197          * We look at the USR dquot with id == 0 first, but if user quotas
1198          * are not enabled we goto the GRP dquot with id == 0.
1199          * We don't really care to keep separate default limits for user
1200          * and group quotas, at least not at this point.
1201          */
1202         error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
1203                              XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
1204                              (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1205                                 XFS_DQ_PROJ),
1206                              XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
1207                              &dqp);
1208         if (! error) {
1209                 xfs_disk_dquot_t        *ddqp = &dqp->q_core;
1210
1211                 /*
1212                  * The warnings and timers set the grace period given to
1213                  * a user or group before he or she can not perform any
1214                  * more writing. If it is zero, a default is used.
1215                  */
1216                 qinf->qi_btimelimit =
1217                                 INT_GET(ddqp->d_btimer, ARCH_CONVERT) ?
1218                                 INT_GET(ddqp->d_btimer, ARCH_CONVERT) :
1219                                 XFS_QM_BTIMELIMIT;
1220                 qinf->qi_itimelimit =
1221                                 INT_GET(ddqp->d_itimer, ARCH_CONVERT) ?
1222                                 INT_GET(ddqp->d_itimer, ARCH_CONVERT) :
1223                                 XFS_QM_ITIMELIMIT;
1224                 qinf->qi_rtbtimelimit =
1225                                 INT_GET(ddqp->d_rtbtimer, ARCH_CONVERT) ?
1226                                 INT_GET(ddqp->d_rtbtimer, ARCH_CONVERT) :
1227                                 XFS_QM_RTBTIMELIMIT;
1228                 qinf->qi_bwarnlimit =
1229                                 INT_GET(ddqp->d_bwarns, ARCH_CONVERT) ?
1230                                 INT_GET(ddqp->d_bwarns, ARCH_CONVERT) :
1231                                 XFS_QM_BWARNLIMIT;
1232                 qinf->qi_iwarnlimit =
1233                                 INT_GET(ddqp->d_iwarns, ARCH_CONVERT) ?
1234                                 INT_GET(ddqp->d_iwarns, ARCH_CONVERT) :
1235                                 XFS_QM_IWARNLIMIT;
1236                 qinf->qi_rtbwarnlimit =
1237                                 INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) ?
1238                                 INT_GET(ddqp->d_rtbwarns, ARCH_CONVERT) :
1239                                 XFS_QM_RTBWARNLIMIT;
1240                 qinf->qi_bhardlimit =
1241                                 INT_GET(ddqp->d_blk_hardlimit, ARCH_CONVERT);
1242                 qinf->qi_bsoftlimit =
1243                                 INT_GET(ddqp->d_blk_softlimit, ARCH_CONVERT);
1244                 qinf->qi_ihardlimit =
1245                                 INT_GET(ddqp->d_ino_hardlimit, ARCH_CONVERT);
1246                 qinf->qi_isoftlimit =
1247                                 INT_GET(ddqp->d_ino_softlimit, ARCH_CONVERT);
1248                 qinf->qi_rtbhardlimit =
1249                                 INT_GET(ddqp->d_rtb_hardlimit, ARCH_CONVERT);
1250                 qinf->qi_rtbsoftlimit =
1251                                 INT_GET(ddqp->d_rtb_softlimit, ARCH_CONVERT);
1252  
1253                 /*
1254                  * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1255                  * we don't want this dquot cached. We haven't done a
1256                  * quotacheck yet, and quotacheck doesn't like incore dquots.
1257                  */
1258                 xfs_qm_dqdestroy(dqp);
1259         } else {
1260                 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
1261                 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
1262                 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
1263                 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
1264                 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
1265                 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1266         }
1267
1268         return (0);
1269 }
1270
1271
1272 /*
1273  * Gets called when unmounting a filesystem or when all quotas get
1274  * turned off.
1275  * This purges the quota inodes, destroys locks and frees itself.
1276  */
1277 void
1278 xfs_qm_destroy_quotainfo(
1279         xfs_mount_t     *mp)
1280 {
1281         xfs_quotainfo_t *qi;
1282
1283         qi = mp->m_quotainfo;
1284         ASSERT(qi != NULL);
1285         ASSERT(xfs_Gqm != NULL);
1286
1287         /*
1288          * Release the reference that XQM kept, so that we know
1289          * when the XQM structure should be freed. We cannot assume
1290          * that xfs_Gqm is non-null after this point.
1291          */
1292         xfs_qm_rele_quotafs_ref(mp);
1293
1294         spinlock_destroy(&qi->qi_pinlock);
1295         xfs_qm_list_destroy(&qi->qi_dqlist);
1296
1297         if (qi->qi_uquotaip) {
1298                 XFS_PURGE_INODE(qi->qi_uquotaip);
1299                 qi->qi_uquotaip = NULL; /* paranoia */
1300         }
1301         if (qi->qi_gquotaip) {
1302                 XFS_PURGE_INODE(qi->qi_gquotaip);
1303                 qi->qi_gquotaip = NULL;
1304         }
1305         mutex_destroy(&qi->qi_quotaofflock);
1306         kmem_free(qi, sizeof(xfs_quotainfo_t));
1307         mp->m_quotainfo = NULL;
1308 }
1309
1310
1311
1312 /* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1313
1314 /* ARGSUSED */
1315 STATIC void
1316 xfs_qm_list_init(
1317         xfs_dqlist_t    *list,
1318         char            *str,
1319         int             n)
1320 {
1321         mutex_init(&list->qh_lock, MUTEX_DEFAULT, str);
1322         list->qh_next = NULL;
1323         list->qh_version = 0;
1324         list->qh_nelems = 0;
1325 }
1326
1327 STATIC void
1328 xfs_qm_list_destroy(
1329         xfs_dqlist_t    *list)
1330 {
1331         mutex_destroy(&(list->qh_lock));
1332 }
1333
1334
1335 /*
1336  * Stripped down version of dqattach. This doesn't attach, or even look at the
1337  * dquots attached to the inode. The rationale is that there won't be any
1338  * attached at the time this is called from quotacheck.
1339  */
1340 STATIC int
1341 xfs_qm_dqget_noattach(
1342         xfs_inode_t     *ip,
1343         xfs_dquot_t     **O_udqpp,
1344         xfs_dquot_t     **O_gdqpp)
1345 {
1346         int             error;
1347         xfs_mount_t     *mp;
1348         xfs_dquot_t     *udqp, *gdqp;
1349
1350         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
1351         mp = ip->i_mount;
1352         udqp = NULL;
1353         gdqp = NULL;
1354
1355         if (XFS_IS_UQUOTA_ON(mp)) {
1356                 ASSERT(ip->i_udquot == NULL);
1357                 /*
1358                  * We want the dquot allocated if it doesn't exist.
1359                  */
1360                 if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
1361                                          XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
1362                                          &udqp))) {
1363                         /*
1364                          * Shouldn't be able to turn off quotas here.
1365                          */
1366                         ASSERT(error != ESRCH);
1367                         ASSERT(error != ENOENT);
1368                         return (error);
1369                 }
1370                 ASSERT(udqp);
1371         }
1372
1373         if (XFS_IS_OQUOTA_ON(mp)) {
1374                 ASSERT(ip->i_gdquot == NULL);
1375                 if (udqp)
1376                         xfs_dqunlock(udqp);
1377                 error = XFS_IS_GQUOTA_ON(mp) ?
1378                                 xfs_qm_dqget(mp, ip,
1379                                              ip->i_d.di_gid, XFS_DQ_GROUP,
1380                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1381                                              &gdqp) :
1382                                 xfs_qm_dqget(mp, ip,
1383                                              ip->i_d.di_projid, XFS_DQ_PROJ,
1384                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1385                                              &gdqp);
1386                 if (error) {
1387                         if (udqp)
1388                                 xfs_qm_dqrele(udqp);
1389                         ASSERT(error != ESRCH);
1390                         ASSERT(error != ENOENT);
1391                         return (error);
1392                 }
1393                 ASSERT(gdqp);
1394
1395                 /* Reacquire the locks in the right order */
1396                 if (udqp) {
1397                         if (! xfs_qm_dqlock_nowait(udqp)) {
1398                                 xfs_dqunlock(gdqp);
1399                                 xfs_dqlock(udqp);
1400                                 xfs_dqlock(gdqp);
1401                         }
1402                 }
1403         }
1404
1405         *O_udqpp = udqp;
1406         *O_gdqpp = gdqp;
1407
1408 #ifdef QUOTADEBUG
1409         if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1410         if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1411 #endif
1412         return (0);
1413 }
1414
1415 /*
1416  * Create an inode and return with a reference already taken, but unlocked
1417  * This is how we create quota inodes
1418  */
1419 STATIC int
1420 xfs_qm_qino_alloc(
1421         xfs_mount_t     *mp,
1422         xfs_inode_t     **ip,
1423         __int64_t       sbfields,
1424         uint            flags)
1425 {
1426         xfs_trans_t     *tp;
1427         int             error;
1428         unsigned long s;
1429         cred_t          zerocr;
1430         int             committed;
1431
1432         tp = xfs_trans_alloc(mp,XFS_TRANS_QM_QINOCREATE);
1433         if ((error = xfs_trans_reserve(tp,
1434                                       XFS_QM_QINOCREATE_SPACE_RES(mp),
1435                                       XFS_CREATE_LOG_RES(mp), 0,
1436                                       XFS_TRANS_PERM_LOG_RES,
1437                                       XFS_CREATE_LOG_COUNT))) {
1438                 xfs_trans_cancel(tp, 0);
1439                 return (error);
1440         }
1441         memset(&zerocr, 0, sizeof(zerocr));
1442
1443         if ((error = xfs_dir_ialloc(&tp, mp->m_rootip, S_IFREG, 1, 0,
1444                                    &zerocr, 0, 1, ip, &committed))) {
1445                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1446                                  XFS_TRANS_ABORT);
1447                 return (error);
1448         }
1449
1450         /*
1451          * Keep an extra reference to this quota inode. This inode is
1452          * locked exclusively and joined to the transaction already.
1453          */
1454         ASSERT(XFS_ISLOCKED_INODE_EXCL(*ip));
1455         VN_HOLD(XFS_ITOV((*ip)));
1456
1457         /*
1458          * Make the changes in the superblock, and log those too.
1459          * sbfields arg may contain fields other than *QUOTINO;
1460          * VERSIONNUM for example.
1461          */
1462         s = XFS_SB_LOCK(mp);
1463         if (flags & XFS_QMOPT_SBVERSION) {
1464 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1465                 unsigned oldv = mp->m_sb.sb_versionnum;
1466 #endif
1467                 ASSERT(!XFS_SB_VERSION_HASQUOTA(&mp->m_sb));
1468                 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1469                                    XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1470                        (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1471                         XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1472
1473                 XFS_SB_VERSION_ADDQUOTA(&mp->m_sb);
1474                 mp->m_sb.sb_uquotino = NULLFSINO;
1475                 mp->m_sb.sb_gquotino = NULLFSINO;
1476
1477                 /* qflags will get updated _after_ quotacheck */
1478                 mp->m_sb.sb_qflags = 0;
1479 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1480                 cmn_err(CE_NOTE,
1481                         "Old superblock version %x, converting to %x.",
1482                         oldv, mp->m_sb.sb_versionnum);
1483 #endif
1484         }
1485         if (flags & XFS_QMOPT_UQUOTA)
1486                 mp->m_sb.sb_uquotino = (*ip)->i_ino;
1487         else
1488                 mp->m_sb.sb_gquotino = (*ip)->i_ino;
1489         XFS_SB_UNLOCK(mp, s);
1490         xfs_mod_sb(tp, sbfields);
1491
1492         if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES,
1493                                      NULL))) {
1494                 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1495                 return (error);
1496         }
1497         return (0);
1498 }
1499
1500
1501 STATIC int
1502 xfs_qm_reset_dqcounts(
1503         xfs_mount_t     *mp,
1504         xfs_buf_t       *bp,
1505         xfs_dqid_t      id,
1506         uint            type)
1507 {
1508         xfs_disk_dquot_t        *ddq;
1509         int                     j;
1510
1511         xfs_buftrace("RESET DQUOTS", bp);
1512         /*
1513          * Reset all counters and timers. They'll be
1514          * started afresh by xfs_qm_quotacheck.
1515          */
1516 #ifdef DEBUG
1517         j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1518         do_div(j, sizeof(xfs_dqblk_t));
1519         ASSERT(XFS_QM_DQPERBLK(mp) == j);
1520 #endif
1521         ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1522         for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
1523                 /*
1524                  * Do a sanity check, and if needed, repair the dqblk. Don't
1525                  * output any warnings because it's perfectly possible to
1526                  * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
1527                  */
1528                 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1529                                       "xfs_quotacheck");
1530                 INT_SET(ddq->d_bcount, ARCH_CONVERT, 0ULL);
1531                 INT_SET(ddq->d_icount, ARCH_CONVERT, 0ULL);
1532                 INT_SET(ddq->d_rtbcount, ARCH_CONVERT, 0ULL);
1533                 INT_SET(ddq->d_btimer, ARCH_CONVERT, (time_t)0);
1534                 INT_SET(ddq->d_itimer, ARCH_CONVERT, (time_t)0);
1535                 INT_SET(ddq->d_rtbtimer, ARCH_CONVERT, (time_t)0);
1536                 INT_SET(ddq->d_bwarns, ARCH_CONVERT, 0UL);
1537                 INT_SET(ddq->d_iwarns, ARCH_CONVERT, 0UL);
1538                 INT_SET(ddq->d_rtbwarns, ARCH_CONVERT, 0UL);
1539                 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1540         }
1541
1542         return (0);
1543 }
1544
1545 STATIC int
1546 xfs_qm_dqiter_bufs(
1547         xfs_mount_t     *mp,
1548         xfs_dqid_t      firstid,
1549         xfs_fsblock_t   bno,
1550         xfs_filblks_t   blkcnt,
1551         uint            flags)
1552 {
1553         xfs_buf_t       *bp;
1554         int             error;
1555         int             notcommitted;
1556         int             incr;
1557         int             type;
1558
1559         ASSERT(blkcnt > 0);
1560         notcommitted = 0;
1561         incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1562                 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1563         type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1564                 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1565         error = 0;
1566
1567         /*
1568          * Blkcnt arg can be a very big number, and might even be
1569          * larger than the log itself. So, we have to break it up into
1570          * manageable-sized transactions.
1571          * Note that we don't start a permanent transaction here; we might
1572          * not be able to get a log reservation for the whole thing up front,
1573          * and we don't really care to either, because we just discard
1574          * everything if we were to crash in the middle of this loop.
1575          */
1576         while (blkcnt--) {
1577                 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1578                               XFS_FSB_TO_DADDR(mp, bno),
1579                               (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
1580                 if (error)
1581                         break;
1582
1583                 (void) xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1584                 xfs_bdwrite(mp, bp);
1585                 /*
1586                  * goto the next block.
1587                  */
1588                 bno++;
1589                 firstid += XFS_QM_DQPERBLK(mp);
1590         }
1591         return (error);
1592 }
1593
1594 /*
1595  * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1596  * caller supplied function for every chunk of dquots that we find.
1597  */
1598 STATIC int
1599 xfs_qm_dqiterate(
1600         xfs_mount_t     *mp,
1601         xfs_inode_t     *qip,
1602         uint            flags)
1603 {
1604         xfs_bmbt_irec_t         *map;
1605         int                     i, nmaps;       /* number of map entries */
1606         int                     error;          /* return value */
1607         xfs_fileoff_t           lblkno;
1608         xfs_filblks_t           maxlblkcnt;
1609         xfs_dqid_t              firstid;
1610         xfs_fsblock_t           rablkno;
1611         xfs_filblks_t           rablkcnt;
1612
1613         error = 0;
1614         /*
1615          * This looks racey, but we can't keep an inode lock across a
1616          * trans_reserve. But, this gets called during quotacheck, and that
1617          * happens only at mount time which is single threaded.
1618          */
1619         if (qip->i_d.di_nblocks == 0)
1620                 return (0);
1621
1622         map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1623
1624         lblkno = 0;
1625         maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1626         do {
1627                 nmaps = XFS_DQITER_MAP_SIZE;
1628                 /*
1629                  * We aren't changing the inode itself. Just changing
1630                  * some of its data. No new blocks are added here, and
1631                  * the inode is never added to the transaction.
1632                  */
1633                 xfs_ilock(qip, XFS_ILOCK_SHARED);
1634                 error = xfs_bmapi(NULL, qip, lblkno,
1635                                   maxlblkcnt - lblkno,
1636                                   XFS_BMAPI_METADATA,
1637                                   NULL,
1638                                   0, map, &nmaps, NULL);
1639                 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1640                 if (error)
1641                         break;
1642
1643                 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1644                 for (i = 0; i < nmaps; i++) {
1645                         ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1646                         ASSERT(map[i].br_blockcount);
1647
1648
1649                         lblkno += map[i].br_blockcount;
1650
1651                         if (map[i].br_startblock == HOLESTARTBLOCK)
1652                                 continue;
1653
1654                         firstid = (xfs_dqid_t) map[i].br_startoff *
1655                                 XFS_QM_DQPERBLK(mp);
1656                         /*
1657                          * Do a read-ahead on the next extent.
1658                          */
1659                         if ((i+1 < nmaps) &&
1660                             (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1661                                 rablkcnt =  map[i+1].br_blockcount;
1662                                 rablkno = map[i+1].br_startblock;
1663                                 while (rablkcnt--) {
1664                                         xfs_baread(mp->m_ddev_targp,
1665                                                XFS_FSB_TO_DADDR(mp, rablkno),
1666                                                (int)XFS_QI_DQCHUNKLEN(mp));
1667                                         rablkno++;
1668                                 }
1669                         }
1670                         /*
1671                          * Iterate thru all the blks in the extent and
1672                          * reset the counters of all the dquots inside them.
1673                          */
1674                         if ((error = xfs_qm_dqiter_bufs(mp,
1675                                                        firstid,
1676                                                        map[i].br_startblock,
1677                                                        map[i].br_blockcount,
1678                                                        flags))) {
1679                                 break;
1680                         }
1681                 }
1682
1683                 if (error)
1684                         break;
1685         } while (nmaps > 0);
1686
1687         kmem_free(map, XFS_DQITER_MAP_SIZE * sizeof(*map));
1688
1689         return (error);
1690 }
1691
1692 /*
1693  * Called by dqusage_adjust in doing a quotacheck.
1694  * Given the inode, and a dquot (either USR or GRP, doesn't matter),
1695  * this updates its incore copy as well as the buffer copy. This is
1696  * so that once the quotacheck is done, we can just log all the buffers,
1697  * as opposed to logging numerous updates to individual dquots.
1698  */
1699 STATIC void
1700 xfs_qm_quotacheck_dqadjust(
1701         xfs_dquot_t             *dqp,
1702         xfs_qcnt_t              nblks,
1703         xfs_qcnt_t              rtblks)
1704 {
1705         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1706         xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
1707         /*
1708          * Adjust the inode count and the block count to reflect this inode's
1709          * resource usage.
1710          */
1711         INT_MOD(dqp->q_core.d_icount, ARCH_CONVERT, +1);
1712         dqp->q_res_icount++;
1713         if (nblks) {
1714                 INT_MOD(dqp->q_core.d_bcount, ARCH_CONVERT, nblks);
1715                 dqp->q_res_bcount += nblks;
1716         }
1717         if (rtblks) {
1718                 INT_MOD(dqp->q_core.d_rtbcount, ARCH_CONVERT, rtblks);
1719                 dqp->q_res_rtbcount += rtblks;
1720         }
1721
1722         /*
1723          * Set default limits, adjust timers (since we changed usages)
1724          */
1725         if (! XFS_IS_SUSER_DQUOT(dqp)) {
1726                 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1727                 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1728         }
1729
1730         dqp->dq_flags |= XFS_DQ_DIRTY;
1731 }
1732
1733 STATIC int
1734 xfs_qm_get_rtblks(
1735         xfs_inode_t     *ip,
1736         xfs_qcnt_t      *O_rtblks)
1737 {
1738         xfs_filblks_t   rtblks;                 /* total rt blks */
1739         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1740         xfs_extnum_t    nextents;               /* number of extent entries */
1741         xfs_bmbt_rec_t  *base;                  /* base of extent array */
1742         xfs_bmbt_rec_t  *ep;                    /* pointer to an extent entry */
1743         int             error;
1744
1745         ASSERT(XFS_IS_REALTIME_INODE(ip));
1746         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1747         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1748                 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1749                         return (error);
1750         }
1751         rtblks = 0;
1752         nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
1753         base = &ifp->if_u1.if_extents[0];
1754         for (ep = base; ep < &base[nextents]; ep++)
1755                 rtblks += xfs_bmbt_get_blockcount(ep);
1756         *O_rtblks = (xfs_qcnt_t)rtblks;
1757         return (0);
1758 }
1759
1760 /*
1761  * callback routine supplied to bulkstat(). Given an inumber, find its
1762  * dquots and update them to account for resources taken by that inode.
1763  */
1764 /* ARGSUSED */
1765 STATIC int
1766 xfs_qm_dqusage_adjust(
1767         xfs_mount_t     *mp,            /* mount point for filesystem */
1768         xfs_ino_t       ino,            /* inode number to get data for */
1769         void            __user *buffer, /* not used */
1770         int             ubsize,         /* not used */
1771         void            *private_data,  /* not used */
1772         xfs_daddr_t     bno,            /* starting block of inode cluster */
1773         int             *ubused,        /* not used */
1774         void            *dip,           /* on-disk inode pointer (not used) */
1775         int             *res)           /* result code value */
1776 {
1777         xfs_inode_t     *ip;
1778         xfs_dquot_t     *udqp, *gdqp;
1779         xfs_qcnt_t      nblks, rtblks;
1780         int             error;
1781
1782         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1783
1784         /*
1785          * rootino must have its resources accounted for, not so with the quota
1786          * inodes.
1787          */
1788         if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1789                 *res = BULKSTAT_RV_NOTHING;
1790                 return XFS_ERROR(EINVAL);
1791         }
1792
1793         /*
1794          * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1795          * interface expects the inode to be exclusively locked because that's
1796          * the case in all other instances. It's OK that we do this because
1797          * quotacheck is done only at mount time.
1798          */
1799         if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
1800                 *res = BULKSTAT_RV_NOTHING;
1801                 return (error);
1802         }
1803
1804         if (ip->i_d.di_mode == 0) {
1805                 xfs_iput_new(ip, XFS_ILOCK_EXCL);
1806                 *res = BULKSTAT_RV_NOTHING;
1807                 return XFS_ERROR(ENOENT);
1808         }
1809
1810         /*
1811          * Obtain the locked dquots. In case of an error (eg. allocation
1812          * fails for ENOSPC), we return the negative of the error number
1813          * to bulkstat, so that it can get propagated to quotacheck() and
1814          * making us disable quotas for the file system.
1815          */
1816         if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1817                 xfs_iput(ip, XFS_ILOCK_EXCL);
1818                 *res = BULKSTAT_RV_GIVEUP;
1819                 return (error);
1820         }
1821
1822         rtblks = 0;
1823         if (! XFS_IS_REALTIME_INODE(ip)) {
1824                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
1825         } else {
1826                 /*
1827                  * Walk thru the extent list and count the realtime blocks.
1828                  */
1829                 if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
1830                         xfs_iput(ip, XFS_ILOCK_EXCL);
1831                         if (udqp)
1832                                 xfs_qm_dqput(udqp);
1833                         if (gdqp)
1834                                 xfs_qm_dqput(gdqp);
1835                         *res = BULKSTAT_RV_GIVEUP;
1836                         return (error);
1837                 }
1838                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1839         }
1840         ASSERT(ip->i_delayed_blks == 0);
1841
1842         /*
1843          * We can't release the inode while holding its dquot locks.
1844          * The inode can go into inactive and might try to acquire the dquotlocks.
1845          * So, just unlock here and do a vn_rele at the end.
1846          */
1847         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1848
1849         /*
1850          * Add the (disk blocks and inode) resources occupied by this
1851          * inode to its dquots. We do this adjustment in the incore dquot,
1852          * and also copy the changes to its buffer.
1853          * We don't care about putting these changes in a transaction
1854          * envelope because if we crash in the middle of a 'quotacheck'
1855          * we have to start from the beginning anyway.
1856          * Once we're done, we'll log all the dquot bufs.
1857          *
1858          * The *QUOTA_ON checks below may look pretty racey, but quotachecks
1859          * and quotaoffs don't race. (Quotachecks happen at mount time only).
1860          */
1861         if (XFS_IS_UQUOTA_ON(mp)) {
1862                 ASSERT(udqp);
1863                 xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
1864                 xfs_qm_dqput(udqp);
1865         }
1866         if (XFS_IS_OQUOTA_ON(mp)) {
1867                 ASSERT(gdqp);
1868                 xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
1869                 xfs_qm_dqput(gdqp);
1870         }
1871         /*
1872          * Now release the inode. This will send it to 'inactive', and
1873          * possibly even free blocks.
1874          */
1875         VN_RELE(XFS_ITOV(ip));
1876
1877         /*
1878          * Goto next inode.
1879          */
1880         *res = BULKSTAT_RV_DIDONE;
1881         return (0);
1882 }
1883
1884 /*
1885  * Walk thru all the filesystem inodes and construct a consistent view
1886  * of the disk quota world. If the quotacheck fails, disable quotas.
1887  */
1888 int
1889 xfs_qm_quotacheck(
1890         xfs_mount_t     *mp)
1891 {
1892         int             done, count, error;
1893         xfs_ino_t       lastino;
1894         size_t          structsz;
1895         xfs_inode_t     *uip, *gip;
1896         uint            flags;
1897
1898         count = INT_MAX;
1899         structsz = 1;
1900         lastino = 0;
1901         flags = 0;
1902
1903         ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
1904         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1905
1906         /*
1907          * There should be no cached dquots. The (simplistic) quotacheck
1908          * algorithm doesn't like that.
1909          */
1910         ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
1911
1912         cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1913
1914         /*
1915          * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1916          * their counters to zero. We need a clean slate.
1917          * We don't log our changes till later.
1918          */
1919         if ((uip = XFS_QI_UQIP(mp))) {
1920                 if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
1921                         goto error_return;
1922                 flags |= XFS_UQUOTA_CHKD;
1923         }
1924
1925         if ((gip = XFS_QI_GQIP(mp))) {
1926                 if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1927                                         XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
1928                         goto error_return;
1929                 flags |= XFS_OQUOTA_CHKD;
1930         }
1931
1932         do {
1933                 /*
1934                  * Iterate thru all the inodes in the file system,
1935                  * adjusting the corresponding dquot counters in core.
1936                  */
1937                 if ((error = xfs_bulkstat(mp, &lastino, &count,
1938                                      xfs_qm_dqusage_adjust, NULL,
1939                                      structsz, NULL,
1940                                      BULKSTAT_FG_IGET|BULKSTAT_FG_VFSLOCKED,
1941                                      &done)))
1942                         break;
1943
1944         } while (! done);
1945
1946         /*
1947          * We can get this error if we couldn't do a dquot allocation inside
1948          * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1949          * dirty dquots that might be cached, we just want to get rid of them
1950          * and turn quotaoff. The dquots won't be attached to any of the inodes
1951          * at this point (because we intentionally didn't in dqget_noattach).
1952          */
1953         if (error) {
1954                 xfs_qm_dqpurge_all(mp,
1955                                    XFS_QMOPT_UQUOTA|XFS_QMOPT_GQUOTA|
1956                                    XFS_QMOPT_PQUOTA|XFS_QMOPT_QUOTAOFF);
1957                 goto error_return;
1958         }
1959         /*
1960          * We've made all the changes that we need to make incore.
1961          * Now flush_them down to disk buffers.
1962          */
1963         xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
1964
1965         /*
1966          * We didn't log anything, because if we crashed, we'll have to
1967          * start the quotacheck from scratch anyway. However, we must make
1968          * sure that our dquot changes are secure before we put the
1969          * quotacheck'd stamp on the superblock. So, here we do a synchronous
1970          * flush.
1971          */
1972         XFS_bflush(mp->m_ddev_targp);
1973
1974         /*
1975          * If one type of quotas is off, then it will lose its
1976          * quotachecked status, since we won't be doing accounting for
1977          * that type anymore.
1978          */
1979         mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1980         mp->m_qflags |= flags;
1981
1982         XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
1983
1984  error_return:
1985         if (error) {
1986                 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
1987                         "Disabling quotas.",
1988                         mp->m_fsname, error);
1989                 /*
1990                  * We must turn off quotas.
1991                  */
1992                 ASSERT(mp->m_quotainfo != NULL);
1993                 ASSERT(xfs_Gqm != NULL);
1994                 xfs_qm_destroy_quotainfo(mp);
1995                 (void)xfs_mount_reset_sbqflags(mp);
1996         } else {
1997                 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
1998         }
1999         return (error);
2000 }
2001
2002 /*
2003  * This is called after the superblock has been read in and we're ready to
2004  * iget the quota inodes.
2005  */
2006 STATIC int
2007 xfs_qm_init_quotainos(
2008         xfs_mount_t     *mp)
2009 {
2010         xfs_inode_t     *uip, *gip;
2011         int             error;
2012         __int64_t       sbflags;
2013         uint            flags;
2014
2015         ASSERT(mp->m_quotainfo);
2016         uip = gip = NULL;
2017         sbflags = 0;
2018         flags = 0;
2019
2020         /*
2021          * Get the uquota and gquota inodes
2022          */
2023         if (XFS_SB_VERSION_HASQUOTA(&mp->m_sb)) {
2024                 if (XFS_IS_UQUOTA_ON(mp) &&
2025                     mp->m_sb.sb_uquotino != NULLFSINO) {
2026                         ASSERT(mp->m_sb.sb_uquotino > 0);
2027                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
2028                                              0, 0, &uip, 0)))
2029                                 return XFS_ERROR(error);
2030                 }
2031                 if (XFS_IS_OQUOTA_ON(mp) &&
2032                     mp->m_sb.sb_gquotino != NULLFSINO) {
2033                         ASSERT(mp->m_sb.sb_gquotino > 0);
2034                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
2035                                              0, 0, &gip, 0))) {
2036                                 if (uip)
2037                                         VN_RELE(XFS_ITOV(uip));
2038                                 return XFS_ERROR(error);
2039                         }
2040                 }
2041         } else {
2042                 flags |= XFS_QMOPT_SBVERSION;
2043                 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
2044                             XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
2045         }
2046
2047         /*
2048          * Create the two inodes, if they don't exist already. The changes
2049          * made above will get added to a transaction and logged in one of
2050          * the qino_alloc calls below.  If the device is readonly,
2051          * temporarily switch to read-write to do this.
2052          */
2053         if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
2054                 if ((error = xfs_qm_qino_alloc(mp, &uip,
2055                                               sbflags | XFS_SB_UQUOTINO,
2056                                               flags | XFS_QMOPT_UQUOTA)))
2057                         return XFS_ERROR(error);
2058
2059                 flags &= ~XFS_QMOPT_SBVERSION;
2060         }
2061         if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
2062                 flags |= (XFS_IS_GQUOTA_ON(mp) ?
2063                                 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
2064                 error = xfs_qm_qino_alloc(mp, &gip,
2065                                           sbflags | XFS_SB_GQUOTINO, flags);
2066                 if (error) {
2067                         if (uip)
2068                                 VN_RELE(XFS_ITOV(uip));
2069
2070                         return XFS_ERROR(error);
2071                 }
2072         }
2073
2074         XFS_QI_UQIP(mp) = uip;
2075         XFS_QI_GQIP(mp) = gip;
2076
2077         return (0);
2078 }
2079
2080
2081 /*
2082  * Traverse the freelist of dquots and attempt to reclaim a maximum of
2083  * 'howmany' dquots. This operation races with dqlookup(), and attempts to
2084  * favor the lookup function ...
2085  * XXXsup merge this with qm_reclaim_one().
2086  */
2087 STATIC int
2088 xfs_qm_shake_freelist(
2089         int howmany)
2090 {
2091         int             nreclaimed;
2092         xfs_dqhash_t    *hash;
2093         xfs_dquot_t     *dqp, *nextdqp;
2094         int             restarts;
2095         int             nflushes;
2096
2097         if (howmany <= 0)
2098                 return (0);
2099
2100         nreclaimed = 0;
2101         restarts = 0;
2102         nflushes = 0;
2103
2104 #ifdef QUOTADEBUG
2105         cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
2106 #endif
2107         /* lock order is : hashchainlock, freelistlock, mplistlock */
2108  tryagain:
2109         xfs_qm_freelist_lock(xfs_Gqm);
2110
2111         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
2112              ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
2113               nreclaimed < howmany); ) {
2114                 xfs_dqlock(dqp);
2115
2116                 /*
2117                  * We are racing with dqlookup here. Naturally we don't
2118                  * want to reclaim a dquot that lookup wants.
2119                  */
2120                 if (dqp->dq_flags & XFS_DQ_WANT) {
2121                         xfs_dqunlock(dqp);
2122                         xfs_qm_freelist_unlock(xfs_Gqm);
2123                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2124                                 return (nreclaimed);
2125                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2126                         goto tryagain;
2127                 }
2128
2129                 /*
2130                  * If the dquot is inactive, we are assured that it is
2131                  * not on the mplist or the hashlist, and that makes our
2132                  * life easier.
2133                  */
2134                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2135                         ASSERT(dqp->q_mount == NULL);
2136                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2137                         ASSERT(dqp->HL_PREVP == NULL);
2138                         ASSERT(dqp->MPL_PREVP == NULL);
2139                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2140                         nextdqp = dqp->dq_flnext;
2141                         goto off_freelist;
2142                 }
2143
2144                 ASSERT(dqp->MPL_PREVP);
2145                 /*
2146                  * Try to grab the flush lock. If this dquot is in the process of
2147                  * getting flushed to disk, we don't want to reclaim it.
2148                  */
2149                 if (! xfs_qm_dqflock_nowait(dqp)) {
2150                         xfs_dqunlock(dqp);
2151                         dqp = dqp->dq_flnext;
2152                         continue;
2153                 }
2154
2155                 /*
2156                  * We have the flush lock so we know that this is not in the
2157                  * process of being flushed. So, if this is dirty, flush it
2158                  * DELWRI so that we don't get a freelist infested with
2159                  * dirty dquots.
2160                  */
2161                 if (XFS_DQ_IS_DIRTY(dqp)) {
2162                         xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
2163                         /*
2164                          * We flush it delayed write, so don't bother
2165                          * releasing the mplock.
2166                          */
2167                         (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2168                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2169                         dqp = dqp->dq_flnext;
2170                         continue;
2171                 }
2172                 /*
2173                  * We're trying to get the hashlock out of order. This races
2174                  * with dqlookup; so, we giveup and goto the next dquot if
2175                  * we couldn't get the hashlock. This way, we won't starve
2176                  * a dqlookup process that holds the hashlock that is
2177                  * waiting for the freelist lock.
2178                  */
2179                 if (! xfs_qm_dqhashlock_nowait(dqp)) {
2180                         xfs_dqfunlock(dqp);
2181                         xfs_dqunlock(dqp);
2182                         dqp = dqp->dq_flnext;
2183                         continue;
2184                 }
2185                 /*
2186                  * This races with dquot allocation code as well as dqflush_all
2187                  * and reclaim code. So, if we failed to grab the mplist lock,
2188                  * giveup everything and start over.
2189                  */
2190                 hash = dqp->q_hash;
2191                 ASSERT(hash);
2192                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2193                         /* XXX put a sentinel so that we can come back here */
2194                         xfs_dqfunlock(dqp);
2195                         xfs_dqunlock(dqp);
2196                         XFS_DQ_HASH_UNLOCK(hash);
2197                         xfs_qm_freelist_unlock(xfs_Gqm);
2198                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2199                                 return (nreclaimed);
2200                         goto tryagain;
2201                 }
2202                 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
2203 #ifdef QUOTADEBUG
2204                 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2205                         dqp, INT_GET(dqp->q_core.d_id, ARCH_CONVERT));
2206 #endif
2207                 ASSERT(dqp->q_nrefs == 0);
2208                 nextdqp = dqp->dq_flnext;
2209                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2210                 XQM_HASHLIST_REMOVE(hash, dqp);
2211                 xfs_dqfunlock(dqp);
2212                 xfs_qm_mplist_unlock(dqp->q_mount);
2213                 XFS_DQ_HASH_UNLOCK(hash);
2214
2215  off_freelist:
2216                 XQM_FREELIST_REMOVE(dqp);
2217                 xfs_dqunlock(dqp);
2218                 nreclaimed++;
2219                 XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
2220                 xfs_qm_dqdestroy(dqp);
2221                 dqp = nextdqp;
2222         }
2223         xfs_qm_freelist_unlock(xfs_Gqm);
2224         return (nreclaimed);
2225 }
2226
2227
2228 /*
2229  * The kmem_shake interface is invoked when memory is running low.
2230  */
2231 /* ARGSUSED */
2232 STATIC int
2233 xfs_qm_shake(int nr_to_scan, unsigned int gfp_mask)
2234 {
2235         int     ndqused, nfree, n;
2236
2237         if (!kmem_shake_allow(gfp_mask))
2238                 return (0);
2239         if (!xfs_Gqm)
2240                 return (0);
2241
2242         nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
2243         /* incore dquots in all f/s's */
2244         ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2245
2246         ASSERT(ndqused >= 0);
2247
2248         if (nfree <= ndqused && nfree < ndquot)
2249                 return (0);
2250
2251         ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
2252         n = nfree - ndqused - ndquot;           /* # over target */
2253
2254         return xfs_qm_shake_freelist(MAX(nfree, n));
2255 }
2256
2257
2258 /*
2259  * Just pop the least recently used dquot off the freelist and
2260  * recycle it. The returned dquot is locked.
2261  */
2262 STATIC xfs_dquot_t *
2263 xfs_qm_dqreclaim_one(void)
2264 {
2265         xfs_dquot_t     *dqpout;
2266         xfs_dquot_t     *dqp;
2267         int             restarts;
2268         int             nflushes;
2269
2270         restarts = 0;
2271         dqpout = NULL;
2272         nflushes = 0;
2273
2274         /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2275  startagain:
2276         xfs_qm_freelist_lock(xfs_Gqm);
2277
2278         FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2279                 xfs_dqlock(dqp);
2280
2281                 /*
2282                  * We are racing with dqlookup here. Naturally we don't
2283                  * want to reclaim a dquot that lookup wants. We release the
2284                  * freelist lock and start over, so that lookup will grab
2285                  * both the dquot and the freelistlock.
2286                  */
2287                 if (dqp->dq_flags & XFS_DQ_WANT) {
2288                         ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2289                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
2290                         xfs_dqunlock(dqp);
2291                         xfs_qm_freelist_unlock(xfs_Gqm);
2292                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2293                                 return (NULL);
2294                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2295                         goto startagain;
2296                 }
2297
2298                 /*
2299                  * If the dquot is inactive, we are assured that it is
2300                  * not on the mplist or the hashlist, and that makes our
2301                  * life easier.
2302                  */
2303                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2304                         ASSERT(dqp->q_mount == NULL);
2305                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2306                         ASSERT(dqp->HL_PREVP == NULL);
2307                         ASSERT(dqp->MPL_PREVP == NULL);
2308                         XQM_FREELIST_REMOVE(dqp);
2309                         xfs_dqunlock(dqp);
2310                         dqpout = dqp;
2311                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2312                         break;
2313                 }
2314
2315                 ASSERT(dqp->q_hash);
2316                 ASSERT(dqp->MPL_PREVP);
2317
2318                 /*
2319                  * Try to grab the flush lock. If this dquot is in the process of
2320                  * getting flushed to disk, we don't want to reclaim it.
2321                  */
2322                 if (! xfs_qm_dqflock_nowait(dqp)) {
2323                         xfs_dqunlock(dqp);
2324                         continue;
2325                 }
2326
2327                 /*
2328                  * We have the flush lock so we know that this is not in the
2329                  * process of being flushed. So, if this is dirty, flush it
2330                  * DELWRI so that we don't get a freelist infested with
2331                  * dirty dquots.
2332                  */
2333                 if (XFS_DQ_IS_DIRTY(dqp)) {
2334                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
2335                         /*
2336                          * We flush it delayed write, so don't bother
2337                          * releasing the freelist lock.
2338                          */
2339                         (void) xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2340                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2341                         continue;
2342                 }
2343
2344                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2345                         xfs_dqfunlock(dqp);
2346                         xfs_dqunlock(dqp);
2347                         continue;
2348                 }
2349
2350                 if (! xfs_qm_dqhashlock_nowait(dqp))
2351                         goto mplistunlock;
2352
2353                 ASSERT(dqp->q_nrefs == 0);
2354                 xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2355                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2356                 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2357                 XQM_FREELIST_REMOVE(dqp);
2358                 dqpout = dqp;
2359                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
2360  mplistunlock:
2361                 xfs_qm_mplist_unlock(dqp->q_mount);
2362                 xfs_dqfunlock(dqp);
2363                 xfs_dqunlock(dqp);
2364                 if (dqpout)
2365                         break;
2366         }
2367
2368         xfs_qm_freelist_unlock(xfs_Gqm);
2369         return (dqpout);
2370 }
2371
2372
2373 /*------------------------------------------------------------------*/
2374
2375 /*
2376  * Return a new incore dquot. Depending on the number of
2377  * dquots in the system, we either allocate a new one on the kernel heap,
2378  * or reclaim a free one.
2379  * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
2380  * to reclaim an existing one from the freelist.
2381  */
2382 boolean_t
2383 xfs_qm_dqalloc_incore(
2384         xfs_dquot_t **O_dqpp)
2385 {
2386         xfs_dquot_t     *dqp;
2387
2388         /*
2389          * Check against high water mark to see if we want to pop
2390          * a nincompoop dquot off the freelist.
2391          */
2392         if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
2393                 /*
2394                  * Try to recycle a dquot from the freelist.
2395                  */
2396                 if ((dqp = xfs_qm_dqreclaim_one())) {
2397                         XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
2398                         /*
2399                          * Just zero the core here. The rest will get
2400                          * reinitialized by caller. XXX we shouldn't even
2401                          * do this zero ...
2402                          */
2403                         memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2404                         *O_dqpp = dqp;
2405                         return (B_FALSE);
2406                 }
2407                 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2408         }
2409
2410         /*
2411          * Allocate a brand new dquot on the kernel heap and return it
2412          * to the caller to initialize.
2413          */
2414         ASSERT(xfs_Gqm->qm_dqzone != NULL);
2415         *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2416         atomic_inc(&xfs_Gqm->qm_totaldquots);
2417
2418         return (B_TRUE);
2419 }
2420
2421
2422 /*
2423  * Start a transaction and write the incore superblock changes to
2424  * disk. flags parameter indicates which fields have changed.
2425  */
2426 int
2427 xfs_qm_write_sb_changes(
2428         xfs_mount_t     *mp,
2429         __int64_t       flags)
2430 {
2431         xfs_trans_t     *tp;
2432         int             error;
2433
2434 #ifdef QUOTADEBUG
2435         cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
2436 #endif
2437         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2438         if ((error = xfs_trans_reserve(tp, 0,
2439                                       mp->m_sb.sb_sectsize + 128, 0,
2440                                       0,
2441                                       XFS_DEFAULT_LOG_COUNT))) {
2442                 xfs_trans_cancel(tp, 0);
2443                 return (error);
2444         }
2445
2446         xfs_mod_sb(tp, flags);
2447         (void) xfs_trans_commit(tp, 0, NULL);
2448
2449         return (0);
2450 }
2451
2452
2453 /* --------------- utility functions for vnodeops ---------------- */
2454
2455
2456 /*
2457  * Given an inode, a uid and gid (from cred_t) make sure that we have
2458  * allocated relevant dquot(s) on disk, and that we won't exceed inode
2459  * quotas by creating this file.
2460  * This also attaches dquot(s) to the given inode after locking it,
2461  * and returns the dquots corresponding to the uid and/or gid.
2462  *
2463  * in   : inode (unlocked)
2464  * out  : udquot, gdquot with references taken and unlocked
2465  */
2466 int
2467 xfs_qm_vop_dqalloc(
2468         xfs_mount_t     *mp,
2469         xfs_inode_t     *ip,
2470         uid_t           uid,
2471         gid_t           gid,
2472         prid_t          prid,
2473         uint            flags,
2474         xfs_dquot_t     **O_udqpp,
2475         xfs_dquot_t     **O_gdqpp)
2476 {
2477         int             error;
2478         xfs_dquot_t     *uq, *gq;
2479         uint            lockflags;
2480
2481         if (!XFS_IS_QUOTA_ON(mp))
2482                 return 0;
2483
2484         lockflags = XFS_ILOCK_EXCL;
2485         xfs_ilock(ip, lockflags);
2486
2487         if ((flags & XFS_QMOPT_INHERIT) &&
2488             XFS_INHERIT_GID(ip, XFS_MTOVFS(mp)))
2489                 gid = ip->i_d.di_gid;
2490
2491         /*
2492          * Attach the dquot(s) to this inode, doing a dquot allocation
2493          * if necessary. The dquot(s) will not be locked.
2494          */
2495         if (XFS_NOT_DQATTACHED(mp, ip)) {
2496                 if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
2497                                             XFS_QMOPT_ILOCKED))) {
2498                         xfs_iunlock(ip, lockflags);
2499                         return (error);
2500                 }
2501         }
2502
2503         uq = gq = NULL;
2504         if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
2505                 if (ip->i_d.di_uid != uid) {
2506                         /*
2507                          * What we need is the dquot that has this uid, and
2508                          * if we send the inode to dqget, the uid of the inode
2509                          * takes priority over what's sent in the uid argument.
2510                          * We must unlock inode here before calling dqget if
2511                          * we're not sending the inode, because otherwise
2512                          * we'll deadlock by doing trans_reserve while
2513                          * holding ilock.
2514                          */
2515                         xfs_iunlock(ip, lockflags);
2516                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
2517                                                  XFS_DQ_USER,
2518                                                  XFS_QMOPT_DQALLOC |
2519                                                  XFS_QMOPT_DOWARN,
2520                                                  &uq))) {
2521                                 ASSERT(error != ENOENT);
2522                                 return (error);
2523                         }
2524                         /*
2525                          * Get the ilock in the right order.
2526                          */
2527                         xfs_dqunlock(uq);
2528                         lockflags = XFS_ILOCK_SHARED;
2529                         xfs_ilock(ip, lockflags);
2530                 } else {
2531                         /*
2532                          * Take an extra reference, because we'll return
2533                          * this to caller
2534                          */
2535                         ASSERT(ip->i_udquot);
2536                         uq = ip->i_udquot;
2537                         xfs_dqlock(uq);
2538                         XFS_DQHOLD(uq);
2539                         xfs_dqunlock(uq);
2540                 }
2541         }
2542         if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
2543                 if (ip->i_d.di_gid != gid) {
2544                         xfs_iunlock(ip, lockflags);
2545                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2546                                                  XFS_DQ_GROUP,
2547                                                  XFS_QMOPT_DQALLOC |
2548                                                  XFS_QMOPT_DOWARN,
2549                                                  &gq))) {
2550                                 if (uq)
2551                                         xfs_qm_dqrele(uq);
2552                                 ASSERT(error != ENOENT);
2553                                 return (error);
2554                         }
2555                         xfs_dqunlock(gq);
2556                         lockflags = XFS_ILOCK_SHARED;
2557                         xfs_ilock(ip, lockflags);
2558                 } else {
2559                         ASSERT(ip->i_gdquot);
2560                         gq = ip->i_gdquot;
2561                         xfs_dqlock(gq);
2562                         XFS_DQHOLD(gq);
2563                         xfs_dqunlock(gq);
2564                 }
2565         } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2566                 if (ip->i_d.di_projid != prid) {
2567                         xfs_iunlock(ip, lockflags);
2568                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2569                                                  XFS_DQ_PROJ,
2570                                                  XFS_QMOPT_DQALLOC |
2571                                                  XFS_QMOPT_DOWARN,
2572                                                  &gq))) {
2573                                 if (uq)
2574                                         xfs_qm_dqrele(uq);
2575                                 ASSERT(error != ENOENT);
2576                                 return (error);
2577                         }
2578                         xfs_dqunlock(gq);
2579                         lockflags = XFS_ILOCK_SHARED;
2580                         xfs_ilock(ip, lockflags);
2581                 } else {
2582                         ASSERT(ip->i_gdquot);
2583                         gq = ip->i_gdquot;
2584                         xfs_dqlock(gq);
2585                         XFS_DQHOLD(gq);
2586                         xfs_dqunlock(gq);
2587                 }
2588         }
2589         if (uq)
2590                 xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
2591
2592         xfs_iunlock(ip, lockflags);
2593         if (O_udqpp)
2594                 *O_udqpp = uq;
2595         else if (uq)
2596                 xfs_qm_dqrele(uq);
2597         if (O_gdqpp)
2598                 *O_gdqpp = gq;
2599         else if (gq)
2600                 xfs_qm_dqrele(gq);
2601         return (0);
2602 }
2603
2604 /*
2605  * Actually transfer ownership, and do dquot modifications.
2606  * These were already reserved.
2607  */
2608 xfs_dquot_t *
2609 xfs_qm_vop_chown(
2610         xfs_trans_t     *tp,
2611         xfs_inode_t     *ip,
2612         xfs_dquot_t     **IO_olddq,
2613         xfs_dquot_t     *newdq)
2614 {
2615         xfs_dquot_t     *prevdq;
2616         uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
2617                                  XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2618
2619         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
2620         ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2621
2622         /* old dquot */
2623         prevdq = *IO_olddq;
2624         ASSERT(prevdq);
2625         ASSERT(prevdq != newdq);
2626
2627         xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2628         xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2629
2630         /* the sparkling new dquot */
2631         xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2632         xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2633
2634         /*
2635          * Take an extra reference, because the inode
2636          * is going to keep this dquot pointer even
2637          * after the trans_commit.
2638          */
2639         xfs_dqlock(newdq);
2640         XFS_DQHOLD(newdq);
2641         xfs_dqunlock(newdq);
2642         *IO_olddq = newdq;
2643
2644         return (prevdq);
2645 }
2646
2647 /*
2648  * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2649  */
2650 int
2651 xfs_qm_vop_chown_reserve(
2652         xfs_trans_t     *tp,
2653         xfs_inode_t     *ip,
2654         xfs_dquot_t     *udqp,
2655         xfs_dquot_t     *gdqp,
2656         uint            flags)
2657 {
2658         int             error;
2659         xfs_mount_t     *mp;
2660         uint            delblks, blkflags;
2661         xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2662
2663         ASSERT(XFS_ISLOCKED_INODE(ip));
2664         mp = ip->i_mount;
2665         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2666
2667         delblks = ip->i_delayed_blks;
2668         delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2669         blkflags = XFS_IS_REALTIME_INODE(ip) ?
2670                         XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2671
2672         if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2673             ip->i_d.di_uid != (uid_t)INT_GET(udqp->q_core.d_id, ARCH_CONVERT)) {
2674                 delblksudq = udqp;
2675                 /*
2676                  * If there are delayed allocation blocks, then we have to
2677                  * unreserve those from the old dquot, and add them to the
2678                  * new dquot.
2679                  */
2680                 if (delblks) {
2681                         ASSERT(ip->i_udquot);
2682                         unresudq = ip->i_udquot;
2683                 }
2684         }
2685         if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2686                 if ((XFS_IS_GQUOTA_ON(ip->i_mount) && ip->i_d.di_gid !=
2687                                 INT_GET(gdqp->q_core.d_id, ARCH_CONVERT)) ||
2688                     (XFS_IS_PQUOTA_ON(ip->i_mount) && ip->i_d.di_projid !=
2689                                 INT_GET(gdqp->q_core.d_id, ARCH_CONVERT))) {
2690                         delblksgdq = gdqp;
2691                         if (delblks) {
2692                                 ASSERT(ip->i_gdquot);
2693                                 unresgdq = ip->i_gdquot;
2694                         }
2695                 }
2696         }
2697
2698         if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2699                                 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2700                                 flags | blkflags)))
2701                 return (error);
2702
2703         /*
2704          * Do the delayed blks reservations/unreservations now. Since, these
2705          * are done without the help of a transaction, if a reservation fails
2706          * its previous reservations won't be automatically undone by trans
2707          * code. So, we have to do it manually here.
2708          */
2709         if (delblks) {
2710                 /*
2711                  * Do the reservations first. Unreservation can't fail.
2712                  */
2713                 ASSERT(delblksudq || delblksgdq);
2714                 ASSERT(unresudq || unresgdq);
2715                 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2716                                 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2717                                 flags | blkflags)))
2718                         return (error);
2719                 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2720                                 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2721                                 blkflags);
2722         }
2723
2724         return (0);
2725 }
2726
2727 int
2728 xfs_qm_vop_rename_dqattach(
2729         xfs_inode_t     **i_tab)
2730 {
2731         xfs_inode_t     *ip;
2732         int             i;
2733         int             error;
2734
2735         ip = i_tab[0];
2736
2737         if (! XFS_IS_QUOTA_ON(ip->i_mount))
2738                 return (0);
2739
2740         if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2741                 error = xfs_qm_dqattach(ip, 0);
2742                 if (error)
2743                         return (error);
2744         }
2745         for (i = 1; (i < 4 && i_tab[i]); i++) {
2746                 /*
2747                  * Watch out for duplicate entries in the table.
2748                  */
2749                 if ((ip = i_tab[i]) != i_tab[i-1]) {
2750                         if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2751                                 error = xfs_qm_dqattach(ip, 0);
2752                                 if (error)
2753                                         return (error);
2754                         }
2755                 }
2756         }
2757         return (0);
2758 }
2759
2760 void
2761 xfs_qm_vop_dqattach_and_dqmod_newinode(
2762         xfs_trans_t     *tp,
2763         xfs_inode_t     *ip,
2764         xfs_dquot_t     *udqp,
2765         xfs_dquot_t     *gdqp)
2766 {
2767         if (!XFS_IS_QUOTA_ON(tp->t_mountp))
2768                 return;
2769
2770         ASSERT(XFS_ISLOCKED_INODE_EXCL(ip));
2771         ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
2772
2773         if (udqp) {
2774                 xfs_dqlock(udqp);
2775                 XFS_DQHOLD(udqp);
2776                 xfs_dqunlock(udqp);
2777                 ASSERT(ip->i_udquot == NULL);
2778                 ip->i_udquot = udqp;
2779                 ASSERT(ip->i_d.di_uid == INT_GET(udqp->q_core.d_id, ARCH_CONVERT));
2780                 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2781         }
2782         if (gdqp) {
2783                 xfs_dqlock(gdqp);
2784                 XFS_DQHOLD(gdqp);
2785                 xfs_dqunlock(gdqp);
2786                 ASSERT(ip->i_gdquot == NULL);
2787                 ip->i_gdquot = gdqp;
2788                 ASSERT(ip->i_d.di_gid == INT_GET(gdqp->q_core.d_id, ARCH_CONVERT));
2789                 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2790         }
2791 }
2792
2793 /* ------------- list stuff -----------------*/
2794 STATIC void
2795 xfs_qm_freelist_init(xfs_frlist_t *ql)
2796 {
2797         ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2798         mutex_init(&ql->qh_lock, MUTEX_DEFAULT, "dqf");
2799         ql->qh_version = 0;
2800         ql->qh_nelems = 0;
2801 }
2802
2803 STATIC void
2804 xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2805 {
2806         xfs_dquot_t     *dqp, *nextdqp;
2807
2808         mutex_lock(&ql->qh_lock, PINOD);
2809         for (dqp = ql->qh_next;
2810              dqp != (xfs_dquot_t *)ql; ) {
2811                 xfs_dqlock(dqp);
2812                 nextdqp = dqp->dq_flnext;
2813 #ifdef QUOTADEBUG
2814                 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2815 #endif
2816                 XQM_FREELIST_REMOVE(dqp);
2817                 xfs_dqunlock(dqp);
2818                 xfs_qm_dqdestroy(dqp);
2819                 dqp = nextdqp;
2820         }
2821         /*
2822          * Don't bother about unlocking.
2823          */
2824         mutex_destroy(&ql->qh_lock);
2825
2826         ASSERT(ql->qh_nelems == 0);
2827 }
2828
2829 STATIC void
2830 xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2831 {
2832         dq->dq_flnext = ql->qh_next;
2833         dq->dq_flprev = (xfs_dquot_t *)ql;
2834         ql->qh_next = dq;
2835         dq->dq_flnext->dq_flprev = dq;
2836         xfs_Gqm->qm_dqfreelist.qh_nelems++;
2837         xfs_Gqm->qm_dqfreelist.qh_version++;
2838 }
2839
2840 void
2841 xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2842 {
2843         xfs_dquot_t *next = dq->dq_flnext;
2844         xfs_dquot_t *prev = dq->dq_flprev;
2845
2846         next->dq_flprev = prev;
2847         prev->dq_flnext = next;
2848         dq->dq_flnext = dq->dq_flprev = dq;
2849         xfs_Gqm->qm_dqfreelist.qh_nelems--;
2850         xfs_Gqm->qm_dqfreelist.qh_version++;
2851 }
2852
2853 void
2854 xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2855 {
2856         xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2857 }
2858
2859 STATIC int
2860 xfs_qm_dqhashlock_nowait(
2861         xfs_dquot_t *dqp)
2862 {
2863         int locked;
2864
2865         locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
2866         return (locked);
2867 }
2868
2869 int
2870 xfs_qm_freelist_lock_nowait(
2871         xfs_qm_t *xqm)
2872 {
2873         int locked;
2874
2875         locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
2876         return (locked);
2877 }
2878
2879 STATIC int
2880 xfs_qm_mplist_nowait(
2881         xfs_mount_t     *mp)
2882 {
2883         int locked;
2884
2885         ASSERT(mp->m_quotainfo);
2886         locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
2887         return (locked);
2888 }