Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
[linux-2.6] / fs / xfs / quota / xfs_qm.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_btree.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_itable.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_bmap.h"
44 #include "xfs_rw.h"
45 #include "xfs_acl.h"
46 #include "xfs_attr.h"
47 #include "xfs_buf_item.h"
48 #include "xfs_trans_space.h"
49 #include "xfs_utils.h"
50 #include "xfs_qm.h"
51
52 /*
53  * The global quota manager. There is only one of these for the entire
54  * system, _not_ one per file system. XQM keeps track of the overall
55  * quota functionality, including maintaining the freelist and hash
56  * tables of dquots.
57  */
58 mutex_t         xfs_Gqm_lock;
59 struct xfs_qm   *xfs_Gqm;
60 uint            ndquot;
61
62 kmem_zone_t     *qm_dqzone;
63 kmem_zone_t     *qm_dqtrxzone;
64
65 static cred_t   xfs_zerocr;
66
67 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
68 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
69
70 STATIC void     xfs_qm_freelist_init(xfs_frlist_t *);
71 STATIC void     xfs_qm_freelist_destroy(xfs_frlist_t *);
72 STATIC int      xfs_qm_mplist_nowait(xfs_mount_t *);
73 STATIC int      xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
74
75 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
76 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
77 STATIC int      xfs_qm_shake(int, gfp_t);
78
79 static struct shrinker xfs_qm_shaker = {
80         .shrink = xfs_qm_shake,
81         .seeks = DEFAULT_SEEKS,
82 };
83
84 #ifdef DEBUG
85 extern mutex_t  qcheck_lock;
86 #endif
87
88 #ifdef QUOTADEBUG
89 #define XQM_LIST_PRINT(l, NXT, title) \
90 { \
91         xfs_dquot_t     *dqp; int i = 0; \
92         cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
93         for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
94                 cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
95                                   "bcnt = %d, icnt = %d, refs = %d", \
96                         ++i, (int) be32_to_cpu(dqp->q_core.d_id), \
97                         DQFLAGTO_TYPESTR(dqp),       \
98                         (int) be64_to_cpu(dqp->q_core.d_bcount), \
99                         (int) be64_to_cpu(dqp->q_core.d_icount), \
100                         (int) dqp->q_nrefs);  } \
101 }
102 #else
103 #define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
104 #endif
105
106 /*
107  * Initialize the XQM structure.
108  * Note that there is not one quota manager per file system.
109  */
110 STATIC struct xfs_qm *
111 xfs_Gqm_init(void)
112 {
113         xfs_dqhash_t    *udqhash, *gdqhash;
114         xfs_qm_t        *xqm;
115         size_t          hsize;
116         uint            i;
117
118         /*
119          * Initialize the dquot hash tables.
120          */
121         udqhash = kmem_zalloc_greedy(&hsize,
122                                      XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
123                                      XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t),
124                                      KM_SLEEP | KM_MAYFAIL | KM_LARGE);
125         gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE);
126         hsize /= sizeof(xfs_dqhash_t);
127         ndquot = hsize << 8;
128
129         xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
130         xqm->qm_dqhashmask = hsize - 1;
131         xqm->qm_usr_dqhtable = udqhash;
132         xqm->qm_grp_dqhtable = gdqhash;
133         ASSERT(xqm->qm_usr_dqhtable != NULL);
134         ASSERT(xqm->qm_grp_dqhtable != NULL);
135
136         for (i = 0; i < hsize; i++) {
137                 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
138                 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
139         }
140
141         /*
142          * Freelist of all dquots of all file systems
143          */
144         xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
145
146         /*
147          * dquot zone. we register our own low-memory callback.
148          */
149         if (!qm_dqzone) {
150                 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
151                                                 "xfs_dquots");
152                 qm_dqzone = xqm->qm_dqzone;
153         } else
154                 xqm->qm_dqzone = qm_dqzone;
155
156         register_shrinker(&xfs_qm_shaker);
157
158         /*
159          * The t_dqinfo portion of transactions.
160          */
161         if (!qm_dqtrxzone) {
162                 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
163                                                    "xfs_dqtrx");
164                 qm_dqtrxzone = xqm->qm_dqtrxzone;
165         } else
166                 xqm->qm_dqtrxzone = qm_dqtrxzone;
167
168         atomic_set(&xqm->qm_totaldquots, 0);
169         xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
170         xqm->qm_nrefs = 0;
171 #ifdef DEBUG
172         mutex_init(&qcheck_lock);
173 #endif
174         return xqm;
175 }
176
177 /*
178  * Destroy the global quota manager when its reference count goes to zero.
179  */
180 STATIC void
181 xfs_qm_destroy(
182         struct xfs_qm   *xqm)
183 {
184         int             hsize, i;
185
186         ASSERT(xqm != NULL);
187         ASSERT(xqm->qm_nrefs == 0);
188         unregister_shrinker(&xfs_qm_shaker);
189         hsize = xqm->qm_dqhashmask + 1;
190         for (i = 0; i < hsize; i++) {
191                 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
192                 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
193         }
194         kmem_free(xqm->qm_usr_dqhtable);
195         kmem_free(xqm->qm_grp_dqhtable);
196         xqm->qm_usr_dqhtable = NULL;
197         xqm->qm_grp_dqhtable = NULL;
198         xqm->qm_dqhashmask = 0;
199         xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
200 #ifdef DEBUG
201         mutex_destroy(&qcheck_lock);
202 #endif
203         kmem_free(xqm);
204 }
205
206 /*
207  * Called at mount time to let XQM know that another file system is
208  * starting quotas. This isn't crucial information as the individual mount
209  * structures are pretty independent, but it helps the XQM keep a
210  * global view of what's going on.
211  */
212 /* ARGSUSED */
213 STATIC int
214 xfs_qm_hold_quotafs_ref(
215         struct xfs_mount *mp)
216 {
217         /*
218          * Need to lock the xfs_Gqm structure for things like this. For example,
219          * the structure could disappear between the entry to this routine and
220          * a HOLD operation if not locked.
221          */
222         XFS_QM_LOCK(xfs_Gqm);
223
224         if (xfs_Gqm == NULL)
225                 xfs_Gqm = xfs_Gqm_init();
226         /*
227          * We can keep a list of all filesystems with quotas mounted for
228          * debugging and statistical purposes, but ...
229          * Just take a reference and get out.
230          */
231         XFS_QM_HOLD(xfs_Gqm);
232         XFS_QM_UNLOCK(xfs_Gqm);
233
234         return 0;
235 }
236
237
238 /*
239  * Release the reference that a filesystem took at mount time,
240  * so that we know when we need to destroy the entire quota manager.
241  */
242 /* ARGSUSED */
243 STATIC void
244 xfs_qm_rele_quotafs_ref(
245         struct xfs_mount *mp)
246 {
247         xfs_dquot_t     *dqp, *nextdqp;
248
249         ASSERT(xfs_Gqm);
250         ASSERT(xfs_Gqm->qm_nrefs > 0);
251
252         /*
253          * Go thru the freelist and destroy all inactive dquots.
254          */
255         xfs_qm_freelist_lock(xfs_Gqm);
256
257         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
258              dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
259                 xfs_dqlock(dqp);
260                 nextdqp = dqp->dq_flnext;
261                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
262                         ASSERT(dqp->q_mount == NULL);
263                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
264                         ASSERT(dqp->HL_PREVP == NULL);
265                         ASSERT(dqp->MPL_PREVP == NULL);
266                         XQM_FREELIST_REMOVE(dqp);
267                         xfs_dqunlock(dqp);
268                         xfs_qm_dqdestroy(dqp);
269                 } else {
270                         xfs_dqunlock(dqp);
271                 }
272                 dqp = nextdqp;
273         }
274         xfs_qm_freelist_unlock(xfs_Gqm);
275
276         /*
277          * Destroy the entire XQM. If somebody mounts with quotaon, this'll
278          * be restarted.
279          */
280         XFS_QM_LOCK(xfs_Gqm);
281         XFS_QM_RELE(xfs_Gqm);
282         if (xfs_Gqm->qm_nrefs == 0) {
283                 xfs_qm_destroy(xfs_Gqm);
284                 xfs_Gqm = NULL;
285         }
286         XFS_QM_UNLOCK(xfs_Gqm);
287 }
288
289 /*
290  * Just destroy the quotainfo structure.
291  */
292 void
293 xfs_qm_unmount_quotadestroy(
294         xfs_mount_t     *mp)
295 {
296         if (mp->m_quotainfo)
297                 xfs_qm_destroy_quotainfo(mp);
298 }
299
300
301 /*
302  * This is called from xfs_mountfs to start quotas and initialize all
303  * necessary data structures like quotainfo.  This is also responsible for
304  * running a quotacheck as necessary.  We are guaranteed that the superblock
305  * is consistently read in at this point.
306  *
307  * If we fail here, the mount will continue with quota turned off. We don't
308  * need to inidicate success or failure at all.
309  */
310 void
311 xfs_qm_mount_quotas(
312         xfs_mount_t     *mp)
313 {
314         int             error = 0;
315         uint            sbf;
316
317         /*
318          * If quotas on realtime volumes is not supported, we disable
319          * quotas immediately.
320          */
321         if (mp->m_sb.sb_rextents) {
322                 cmn_err(CE_NOTE,
323                         "Cannot turn on quotas for realtime filesystem %s",
324                         mp->m_fsname);
325                 mp->m_qflags = 0;
326                 goto write_changes;
327         }
328
329         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
330
331         /*
332          * Allocate the quotainfo structure inside the mount struct, and
333          * create quotainode(s), and change/rev superblock if necessary.
334          */
335         error = xfs_qm_init_quotainfo(mp);
336         if (error) {
337                 /*
338                  * We must turn off quotas.
339                  */
340                 ASSERT(mp->m_quotainfo == NULL);
341                 mp->m_qflags = 0;
342                 goto write_changes;
343         }
344         /*
345          * If any of the quotas are not consistent, do a quotacheck.
346          */
347         if (XFS_QM_NEED_QUOTACHECK(mp)) {
348                 error = xfs_qm_quotacheck(mp);
349                 if (error) {
350                         /* Quotacheck failed and disabled quotas. */
351                         return;
352                 }
353         }
354         /* 
355          * If one type of quotas is off, then it will lose its
356          * quotachecked status, since we won't be doing accounting for
357          * that type anymore.
358          */
359         if (!XFS_IS_UQUOTA_ON(mp))
360                 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
361         if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
362                 mp->m_qflags &= ~XFS_OQUOTA_CHKD;
363
364  write_changes:
365         /*
366          * We actually don't have to acquire the m_sb_lock at all.
367          * This can only be called from mount, and that's single threaded. XXX
368          */
369         spin_lock(&mp->m_sb_lock);
370         sbf = mp->m_sb.sb_qflags;
371         mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
372         spin_unlock(&mp->m_sb_lock);
373
374         if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
375                 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
376                         /*
377                          * We could only have been turning quotas off.
378                          * We aren't in very good shape actually because
379                          * the incore structures are convinced that quotas are
380                          * off, but the on disk superblock doesn't know that !
381                          */
382                         ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
383                         xfs_fs_cmn_err(CE_ALERT, mp,
384                                 "XFS mount_quotas: Superblock update failed!");
385                 }
386         }
387
388         if (error) {
389                 xfs_fs_cmn_err(CE_WARN, mp,
390                         "Failed to initialize disk quotas.");
391         }
392         return;
393 }
394
395 /*
396  * Called from the vfsops layer.
397  */
398 void
399 xfs_qm_unmount_quotas(
400         xfs_mount_t     *mp)
401 {
402         /*
403          * Release the dquots that root inode, et al might be holding,
404          * before we flush quotas and blow away the quotainfo structure.
405          */
406         ASSERT(mp->m_rootip);
407         xfs_qm_dqdetach(mp->m_rootip);
408         if (mp->m_rbmip)
409                 xfs_qm_dqdetach(mp->m_rbmip);
410         if (mp->m_rsumip)
411                 xfs_qm_dqdetach(mp->m_rsumip);
412
413         /*
414          * Release the quota inodes.
415          */
416         if (mp->m_quotainfo) {
417                 if (mp->m_quotainfo->qi_uquotaip) {
418                         IRELE(mp->m_quotainfo->qi_uquotaip);
419                         mp->m_quotainfo->qi_uquotaip = NULL;
420                 }
421                 if (mp->m_quotainfo->qi_gquotaip) {
422                         IRELE(mp->m_quotainfo->qi_gquotaip);
423                         mp->m_quotainfo->qi_gquotaip = NULL;
424                 }
425         }
426 }
427
428 /*
429  * Flush all dquots of the given file system to disk. The dquots are
430  * _not_ purged from memory here, just their data written to disk.
431  */
432 STATIC int
433 xfs_qm_dqflush_all(
434         xfs_mount_t     *mp,
435         int             flags)
436 {
437         int             recl;
438         xfs_dquot_t     *dqp;
439         int             niters;
440         int             error;
441
442         if (mp->m_quotainfo == NULL)
443                 return 0;
444         niters = 0;
445 again:
446         xfs_qm_mplist_lock(mp);
447         FOREACH_DQUOT_IN_MP(dqp, mp) {
448                 xfs_dqlock(dqp);
449                 if (! XFS_DQ_IS_DIRTY(dqp)) {
450                         xfs_dqunlock(dqp);
451                         continue;
452                 }
453                 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
454                 /* XXX a sentinel would be better */
455                 recl = XFS_QI_MPLRECLAIMS(mp);
456                 if (!xfs_dqflock_nowait(dqp)) {
457                         /*
458                          * If we can't grab the flush lock then check
459                          * to see if the dquot has been flushed delayed
460                          * write.  If so, grab its buffer and send it
461                          * out immediately.  We'll be able to acquire
462                          * the flush lock when the I/O completes.
463                          */
464                         xfs_qm_dqflock_pushbuf_wait(dqp);
465                 }
466                 /*
467                  * Let go of the mplist lock. We don't want to hold it
468                  * across a disk write.
469                  */
470                 xfs_qm_mplist_unlock(mp);
471                 error = xfs_qm_dqflush(dqp, flags);
472                 xfs_dqunlock(dqp);
473                 if (error)
474                         return error;
475
476                 xfs_qm_mplist_lock(mp);
477                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
478                         xfs_qm_mplist_unlock(mp);
479                         /* XXX restart limit */
480                         goto again;
481                 }
482         }
483
484         xfs_qm_mplist_unlock(mp);
485         /* return ! busy */
486         return 0;
487 }
488 /*
489  * Release the group dquot pointers the user dquots may be
490  * carrying around as a hint. mplist is locked on entry and exit.
491  */
492 STATIC void
493 xfs_qm_detach_gdquots(
494         xfs_mount_t     *mp)
495 {
496         xfs_dquot_t     *dqp, *gdqp;
497         int             nrecl;
498
499  again:
500         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
501         dqp = XFS_QI_MPLNEXT(mp);
502         while (dqp) {
503                 xfs_dqlock(dqp);
504                 if ((gdqp = dqp->q_gdquot)) {
505                         xfs_dqlock(gdqp);
506                         dqp->q_gdquot = NULL;
507                 }
508                 xfs_dqunlock(dqp);
509
510                 if (gdqp) {
511                         /*
512                          * Can't hold the mplist lock across a dqput.
513                          * XXXmust convert to marker based iterations here.
514                          */
515                         nrecl = XFS_QI_MPLRECLAIMS(mp);
516                         xfs_qm_mplist_unlock(mp);
517                         xfs_qm_dqput(gdqp);
518
519                         xfs_qm_mplist_lock(mp);
520                         if (nrecl != XFS_QI_MPLRECLAIMS(mp))
521                                 goto again;
522                 }
523                 dqp = dqp->MPL_NEXT;
524         }
525 }
526
527 /*
528  * Go through all the incore dquots of this file system and take them
529  * off the mplist and hashlist, if the dquot type matches the dqtype
530  * parameter. This is used when turning off quota accounting for
531  * users and/or groups, as well as when the filesystem is unmounting.
532  */
533 STATIC int
534 xfs_qm_dqpurge_int(
535         xfs_mount_t     *mp,
536         uint            flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
537 {
538         xfs_dquot_t     *dqp;
539         uint            dqtype;
540         int             nrecl;
541         xfs_dquot_t     *nextdqp;
542         int             nmisses;
543
544         if (mp->m_quotainfo == NULL)
545                 return 0;
546
547         dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
548         dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
549         dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
550
551         xfs_qm_mplist_lock(mp);
552
553         /*
554          * In the first pass through all incore dquots of this filesystem,
555          * we release the group dquot pointers the user dquots may be
556          * carrying around as a hint. We need to do this irrespective of
557          * what's being turned off.
558          */
559         xfs_qm_detach_gdquots(mp);
560
561       again:
562         nmisses = 0;
563         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
564         /*
565          * Try to get rid of all of the unwanted dquots. The idea is to
566          * get them off mplist and hashlist, but leave them on freelist.
567          */
568         dqp = XFS_QI_MPLNEXT(mp);
569         while (dqp) {
570                 /*
571                  * It's OK to look at the type without taking dqlock here.
572                  * We're holding the mplist lock here, and that's needed for
573                  * a dqreclaim.
574                  */
575                 if ((dqp->dq_flags & dqtype) == 0) {
576                         dqp = dqp->MPL_NEXT;
577                         continue;
578                 }
579
580                 if (! xfs_qm_dqhashlock_nowait(dqp)) {
581                         nrecl = XFS_QI_MPLRECLAIMS(mp);
582                         xfs_qm_mplist_unlock(mp);
583                         XFS_DQ_HASH_LOCK(dqp->q_hash);
584                         xfs_qm_mplist_lock(mp);
585
586                         /*
587                          * XXXTheoretically, we can get into a very long
588                          * ping pong game here.
589                          * No one can be adding dquots to the mplist at
590                          * this point, but somebody might be taking things off.
591                          */
592                         if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
593                                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
594                                 goto again;
595                         }
596                 }
597
598                 /*
599                  * Take the dquot off the mplist and hashlist. It may remain on
600                  * freelist in INACTIVE state.
601                  */
602                 nextdqp = dqp->MPL_NEXT;
603                 nmisses += xfs_qm_dqpurge(dqp);
604                 dqp = nextdqp;
605         }
606         xfs_qm_mplist_unlock(mp);
607         return nmisses;
608 }
609
610 int
611 xfs_qm_dqpurge_all(
612         xfs_mount_t     *mp,
613         uint            flags)
614 {
615         int             ndquots;
616
617         /*
618          * Purge the dquot cache.
619          * None of the dquots should really be busy at this point.
620          */
621         if (mp->m_quotainfo) {
622                 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
623                         delay(ndquots * 10);
624                 }
625         }
626         return 0;
627 }
628
629 STATIC int
630 xfs_qm_dqattach_one(
631         xfs_inode_t     *ip,
632         xfs_dqid_t      id,
633         uint            type,
634         uint            doalloc,
635         uint            dolock,
636         xfs_dquot_t     *udqhint, /* hint */
637         xfs_dquot_t     **IO_idqpp)
638 {
639         xfs_dquot_t     *dqp;
640         int             error;
641
642         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
643         error = 0;
644         /*
645          * See if we already have it in the inode itself. IO_idqpp is
646          * &i_udquot or &i_gdquot. This made the code look weird, but
647          * made the logic a lot simpler.
648          */
649         if ((dqp = *IO_idqpp)) {
650                 if (dolock)
651                         xfs_dqlock(dqp);
652                 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
653                 goto done;
654         }
655
656         /*
657          * udqhint is the i_udquot field in inode, and is non-NULL only
658          * when the type arg is group/project. Its purpose is to save a
659          * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
660          * the user dquot.
661          */
662         ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
663         if (udqhint && !dolock)
664                 xfs_dqlock(udqhint);
665
666         /*
667          * No need to take dqlock to look at the id.
668          * The ID can't change until it gets reclaimed, and it won't
669          * be reclaimed as long as we have a ref from inode and we hold
670          * the ilock.
671          */
672         if (udqhint &&
673             (dqp = udqhint->q_gdquot) &&
674             (be32_to_cpu(dqp->q_core.d_id) == id)) {
675                 ASSERT(XFS_DQ_IS_LOCKED(udqhint));
676                 xfs_dqlock(dqp);
677                 XFS_DQHOLD(dqp);
678                 ASSERT(*IO_idqpp == NULL);
679                 *IO_idqpp = dqp;
680                 if (!dolock) {
681                         xfs_dqunlock(dqp);
682                         xfs_dqunlock(udqhint);
683                 }
684                 goto done;
685         }
686         /*
687          * We can't hold a dquot lock when we call the dqget code.
688          * We'll deadlock in no time, because of (not conforming to)
689          * lock ordering - the inodelock comes before any dquot lock,
690          * and we may drop and reacquire the ilock in xfs_qm_dqget().
691          */
692         if (udqhint)
693                 xfs_dqunlock(udqhint);
694         /*
695          * Find the dquot from somewhere. This bumps the
696          * reference count of dquot and returns it locked.
697          * This can return ENOENT if dquot didn't exist on
698          * disk and we didn't ask it to allocate;
699          * ESRCH if quotas got turned off suddenly.
700          */
701         if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
702                                  doalloc|XFS_QMOPT_DOWARN, &dqp))) {
703                 if (udqhint && dolock)
704                         xfs_dqlock(udqhint);
705                 goto done;
706         }
707
708         xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
709         /*
710          * dqget may have dropped and re-acquired the ilock, but it guarantees
711          * that the dquot returned is the one that should go in the inode.
712          */
713         *IO_idqpp = dqp;
714         ASSERT(dqp);
715         ASSERT(XFS_DQ_IS_LOCKED(dqp));
716         if (! dolock) {
717                 xfs_dqunlock(dqp);
718                 goto done;
719         }
720         if (! udqhint)
721                 goto done;
722
723         ASSERT(udqhint);
724         ASSERT(dolock);
725         ASSERT(XFS_DQ_IS_LOCKED(dqp));
726         if (! xfs_qm_dqlock_nowait(udqhint)) {
727                 xfs_dqunlock(dqp);
728                 xfs_dqlock(udqhint);
729                 xfs_dqlock(dqp);
730         }
731       done:
732 #ifdef QUOTADEBUG
733         if (udqhint) {
734                 if (dolock)
735                         ASSERT(XFS_DQ_IS_LOCKED(udqhint));
736         }
737         if (! error) {
738                 if (dolock)
739                         ASSERT(XFS_DQ_IS_LOCKED(dqp));
740         }
741 #endif
742         return error;
743 }
744
745
746 /*
747  * Given a udquot and gdquot, attach a ptr to the group dquot in the
748  * udquot as a hint for future lookups. The idea sounds simple, but the
749  * execution isn't, because the udquot might have a group dquot attached
750  * already and getting rid of that gets us into lock ordering constraints.
751  * The process is complicated more by the fact that the dquots may or may not
752  * be locked on entry.
753  */
754 STATIC void
755 xfs_qm_dqattach_grouphint(
756         xfs_dquot_t     *udq,
757         xfs_dquot_t     *gdq,
758         uint            locked)
759 {
760         xfs_dquot_t     *tmp;
761
762 #ifdef QUOTADEBUG
763         if (locked) {
764                 ASSERT(XFS_DQ_IS_LOCKED(udq));
765                 ASSERT(XFS_DQ_IS_LOCKED(gdq));
766         }
767 #endif
768         if (! locked)
769                 xfs_dqlock(udq);
770
771         if ((tmp = udq->q_gdquot)) {
772                 if (tmp == gdq) {
773                         if (! locked)
774                                 xfs_dqunlock(udq);
775                         return;
776                 }
777
778                 udq->q_gdquot = NULL;
779                 /*
780                  * We can't keep any dqlocks when calling dqrele,
781                  * because the freelist lock comes before dqlocks.
782                  */
783                 xfs_dqunlock(udq);
784                 if (locked)
785                         xfs_dqunlock(gdq);
786                 /*
787                  * we took a hard reference once upon a time in dqget,
788                  * so give it back when the udquot no longer points at it
789                  * dqput() does the unlocking of the dquot.
790                  */
791                 xfs_qm_dqrele(tmp);
792
793                 xfs_dqlock(udq);
794                 xfs_dqlock(gdq);
795
796         } else {
797                 ASSERT(XFS_DQ_IS_LOCKED(udq));
798                 if (! locked) {
799                         xfs_dqlock(gdq);
800                 }
801         }
802
803         ASSERT(XFS_DQ_IS_LOCKED(udq));
804         ASSERT(XFS_DQ_IS_LOCKED(gdq));
805         /*
806          * Somebody could have attached a gdquot here,
807          * when we dropped the uqlock. If so, just do nothing.
808          */
809         if (udq->q_gdquot == NULL) {
810                 XFS_DQHOLD(gdq);
811                 udq->q_gdquot = gdq;
812         }
813         if (! locked) {
814                 xfs_dqunlock(gdq);
815                 xfs_dqunlock(udq);
816         }
817 }
818
819
820 /*
821  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
822  * into account.
823  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
824  * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
825  * much made this code a complete mess, but it has been pretty useful.
826  * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
827  * Inode may get unlocked and relocked in here, and the caller must deal with
828  * the consequences.
829  */
830 int
831 xfs_qm_dqattach(
832         xfs_inode_t     *ip,
833         uint            flags)
834 {
835         xfs_mount_t     *mp = ip->i_mount;
836         uint            nquotas = 0;
837         int             error = 0;
838
839         if ((! XFS_IS_QUOTA_ON(mp)) ||
840             (! XFS_NOT_DQATTACHED(mp, ip)) ||
841             (ip->i_ino == mp->m_sb.sb_uquotino) ||
842             (ip->i_ino == mp->m_sb.sb_gquotino))
843                 return 0;
844
845         ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
846                xfs_isilocked(ip, XFS_ILOCK_EXCL));
847
848         if (! (flags & XFS_QMOPT_ILOCKED))
849                 xfs_ilock(ip, XFS_ILOCK_EXCL);
850
851         if (XFS_IS_UQUOTA_ON(mp)) {
852                 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
853                                                 flags & XFS_QMOPT_DQALLOC,
854                                                 flags & XFS_QMOPT_DQLOCK,
855                                                 NULL, &ip->i_udquot);
856                 if (error)
857                         goto done;
858                 nquotas++;
859         }
860
861         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
862         if (XFS_IS_OQUOTA_ON(mp)) {
863                 error = XFS_IS_GQUOTA_ON(mp) ?
864                         xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
865                                                 flags & XFS_QMOPT_DQALLOC,
866                                                 flags & XFS_QMOPT_DQLOCK,
867                                                 ip->i_udquot, &ip->i_gdquot) :
868                         xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
869                                                 flags & XFS_QMOPT_DQALLOC,
870                                                 flags & XFS_QMOPT_DQLOCK,
871                                                 ip->i_udquot, &ip->i_gdquot);
872                 /*
873                  * Don't worry about the udquot that we may have
874                  * attached above. It'll get detached, if not already.
875                  */
876                 if (error)
877                         goto done;
878                 nquotas++;
879         }
880
881         /*
882          * Attach this group quota to the user quota as a hint.
883          * This WON'T, in general, result in a thrash.
884          */
885         if (nquotas == 2) {
886                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
887                 ASSERT(ip->i_udquot);
888                 ASSERT(ip->i_gdquot);
889
890                 /*
891                  * We may or may not have the i_udquot locked at this point,
892                  * but this check is OK since we don't depend on the i_gdquot to
893                  * be accurate 100% all the time. It is just a hint, and this
894                  * will succeed in general.
895                  */
896                 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
897                         goto done;
898                 /*
899                  * Attach i_gdquot to the gdquot hint inside the i_udquot.
900                  */
901                 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
902                                          flags & XFS_QMOPT_DQLOCK);
903         }
904
905       done:
906
907 #ifdef QUOTADEBUG
908         if (! error) {
909                 if (ip->i_udquot) {
910                         if (flags & XFS_QMOPT_DQLOCK)
911                                 ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
912                 }
913                 if (ip->i_gdquot) {
914                         if (flags & XFS_QMOPT_DQLOCK)
915                                 ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
916                 }
917                 if (XFS_IS_UQUOTA_ON(mp))
918                         ASSERT(ip->i_udquot);
919                 if (XFS_IS_OQUOTA_ON(mp))
920                         ASSERT(ip->i_gdquot);
921         }
922 #endif
923
924         if (! (flags & XFS_QMOPT_ILOCKED))
925                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
926
927 #ifdef QUOTADEBUG
928         else
929                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
930 #endif
931         return error;
932 }
933
934 /*
935  * Release dquots (and their references) if any.
936  * The inode should be locked EXCL except when this's called by
937  * xfs_ireclaim.
938  */
939 void
940 xfs_qm_dqdetach(
941         xfs_inode_t     *ip)
942 {
943         if (!(ip->i_udquot || ip->i_gdquot))
944                 return;
945
946         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
947         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
948         if (ip->i_udquot) {
949                 xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
950                 xfs_qm_dqrele(ip->i_udquot);
951                 ip->i_udquot = NULL;
952         }
953         if (ip->i_gdquot) {
954                 xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
955                 xfs_qm_dqrele(ip->i_gdquot);
956                 ip->i_gdquot = NULL;
957         }
958 }
959
960 /*
961  * This is called to sync quotas. We can be told to use non-blocking
962  * semantics by either the SYNC_BDFLUSH flag or the absence of the
963  * SYNC_WAIT flag.
964  */
965 int
966 xfs_qm_sync(
967         xfs_mount_t     *mp,
968         int             flags)
969 {
970         int             recl, restarts;
971         xfs_dquot_t     *dqp;
972         uint            flush_flags;
973         boolean_t       nowait;
974         int             error;
975
976         if (! XFS_IS_QUOTA_ON(mp))
977                 return 0;
978
979         restarts = 0;
980         /*
981          * We won't block unless we are asked to.
982          */
983         nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
984
985   again:
986         xfs_qm_mplist_lock(mp);
987         /*
988          * dqpurge_all() also takes the mplist lock and iterate thru all dquots
989          * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
990          * when we have the mplist lock, we know that dquots will be consistent
991          * as long as we have it locked.
992          */
993         if (! XFS_IS_QUOTA_ON(mp)) {
994                 xfs_qm_mplist_unlock(mp);
995                 return 0;
996         }
997         FOREACH_DQUOT_IN_MP(dqp, mp) {
998                 /*
999                  * If this is vfs_sync calling, then skip the dquots that
1000                  * don't 'seem' to be dirty. ie. don't acquire dqlock.
1001                  * This is very similar to what xfs_sync does with inodes.
1002                  */
1003                 if (flags & SYNC_BDFLUSH) {
1004                         if (! XFS_DQ_IS_DIRTY(dqp))
1005                                 continue;
1006                 }
1007
1008                 if (nowait) {
1009                         /*
1010                          * Try to acquire the dquot lock. We are NOT out of
1011                          * lock order, but we just don't want to wait for this
1012                          * lock, unless somebody wanted us to.
1013                          */
1014                         if (! xfs_qm_dqlock_nowait(dqp))
1015                                 continue;
1016                 } else {
1017                         xfs_dqlock(dqp);
1018                 }
1019
1020                 /*
1021                  * Now, find out for sure if this dquot is dirty or not.
1022                  */
1023                 if (! XFS_DQ_IS_DIRTY(dqp)) {
1024                         xfs_dqunlock(dqp);
1025                         continue;
1026                 }
1027
1028                 /* XXX a sentinel would be better */
1029                 recl = XFS_QI_MPLRECLAIMS(mp);
1030                 if (!xfs_dqflock_nowait(dqp)) {
1031                         if (nowait) {
1032                                 xfs_dqunlock(dqp);
1033                                 continue;
1034                         }
1035                         /*
1036                          * If we can't grab the flush lock then if the caller
1037                          * really wanted us to give this our best shot, so
1038                          * see if we can give a push to the buffer before we wait
1039                          * on the flush lock. At this point, we know that
1040                          * even though the dquot is being flushed,
1041                          * it has (new) dirty data.
1042                          */
1043                         xfs_qm_dqflock_pushbuf_wait(dqp);
1044                 }
1045                 /*
1046                  * Let go of the mplist lock. We don't want to hold it
1047                  * across a disk write
1048                  */
1049                 flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
1050                 xfs_qm_mplist_unlock(mp);
1051                 xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
1052                 error = xfs_qm_dqflush(dqp, flush_flags);
1053                 xfs_dqunlock(dqp);
1054                 if (error && XFS_FORCED_SHUTDOWN(mp))
1055                         return 0;       /* Need to prevent umount failure */
1056                 else if (error)
1057                         return error;
1058
1059                 xfs_qm_mplist_lock(mp);
1060                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
1061                         if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1062                                 break;
1063
1064                         xfs_qm_mplist_unlock(mp);
1065                         goto again;
1066                 }
1067         }
1068
1069         xfs_qm_mplist_unlock(mp);
1070         return 0;
1071 }
1072
1073
1074 /*
1075  * This initializes all the quota information that's kept in the
1076  * mount structure
1077  */
1078 STATIC int
1079 xfs_qm_init_quotainfo(
1080         xfs_mount_t     *mp)
1081 {
1082         xfs_quotainfo_t *qinf;
1083         int             error;
1084         xfs_dquot_t     *dqp;
1085
1086         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1087
1088         /*
1089          * Tell XQM that we exist as soon as possible.
1090          */
1091         if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1092                 return error;
1093         }
1094
1095         qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
1096
1097         /*
1098          * See if quotainodes are setup, and if not, allocate them,
1099          * and change the superblock accordingly.
1100          */
1101         if ((error = xfs_qm_init_quotainos(mp))) {
1102                 kmem_free(qinf);
1103                 mp->m_quotainfo = NULL;
1104                 return error;
1105         }
1106
1107         xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1108         qinf->qi_dqreclaims = 0;
1109
1110         /* mutex used to serialize quotaoffs */
1111         mutex_init(&qinf->qi_quotaofflock);
1112
1113         /* Precalc some constants */
1114         qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1115         ASSERT(qinf->qi_dqchunklen);
1116         qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
1117         do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
1118
1119         mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
1120
1121         /*
1122          * We try to get the limits from the superuser's limits fields.
1123          * This is quite hacky, but it is standard quota practice.
1124          * We look at the USR dquot with id == 0 first, but if user quotas
1125          * are not enabled we goto the GRP dquot with id == 0.
1126          * We don't really care to keep separate default limits for user
1127          * and group quotas, at least not at this point.
1128          */
1129         error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
1130                              XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
1131                              (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1132                                 XFS_DQ_PROJ),
1133                              XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
1134                              &dqp);
1135         if (! error) {
1136                 xfs_disk_dquot_t        *ddqp = &dqp->q_core;
1137
1138                 /*
1139                  * The warnings and timers set the grace period given to
1140                  * a user or group before he or she can not perform any
1141                  * more writing. If it is zero, a default is used.
1142                  */
1143                 qinf->qi_btimelimit = ddqp->d_btimer ?
1144                         be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
1145                 qinf->qi_itimelimit = ddqp->d_itimer ?
1146                         be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
1147                 qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
1148                         be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
1149                 qinf->qi_bwarnlimit = ddqp->d_bwarns ?
1150                         be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
1151                 qinf->qi_iwarnlimit = ddqp->d_iwarns ?
1152                         be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
1153                 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
1154                         be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
1155                 qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
1156                 qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
1157                 qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
1158                 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
1159                 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1160                 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1161  
1162                 /*
1163                  * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1164                  * we don't want this dquot cached. We haven't done a
1165                  * quotacheck yet, and quotacheck doesn't like incore dquots.
1166                  */
1167                 xfs_qm_dqdestroy(dqp);
1168         } else {
1169                 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
1170                 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
1171                 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
1172                 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
1173                 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
1174                 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1175         }
1176
1177         return 0;
1178 }
1179
1180
1181 /*
1182  * Gets called when unmounting a filesystem or when all quotas get
1183  * turned off.
1184  * This purges the quota inodes, destroys locks and frees itself.
1185  */
1186 void
1187 xfs_qm_destroy_quotainfo(
1188         xfs_mount_t     *mp)
1189 {
1190         xfs_quotainfo_t *qi;
1191
1192         qi = mp->m_quotainfo;
1193         ASSERT(qi != NULL);
1194         ASSERT(xfs_Gqm != NULL);
1195
1196         /*
1197          * Release the reference that XQM kept, so that we know
1198          * when the XQM structure should be freed. We cannot assume
1199          * that xfs_Gqm is non-null after this point.
1200          */
1201         xfs_qm_rele_quotafs_ref(mp);
1202
1203         xfs_qm_list_destroy(&qi->qi_dqlist);
1204
1205         if (qi->qi_uquotaip) {
1206                 IRELE(qi->qi_uquotaip);
1207                 qi->qi_uquotaip = NULL; /* paranoia */
1208         }
1209         if (qi->qi_gquotaip) {
1210                 IRELE(qi->qi_gquotaip);
1211                 qi->qi_gquotaip = NULL;
1212         }
1213         mutex_destroy(&qi->qi_quotaofflock);
1214         kmem_free(qi);
1215         mp->m_quotainfo = NULL;
1216 }
1217
1218
1219
1220 /* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1221
1222 /* ARGSUSED */
1223 STATIC void
1224 xfs_qm_list_init(
1225         xfs_dqlist_t    *list,
1226         char            *str,
1227         int             n)
1228 {
1229         mutex_init(&list->qh_lock);
1230         list->qh_next = NULL;
1231         list->qh_version = 0;
1232         list->qh_nelems = 0;
1233 }
1234
1235 STATIC void
1236 xfs_qm_list_destroy(
1237         xfs_dqlist_t    *list)
1238 {
1239         mutex_destroy(&(list->qh_lock));
1240 }
1241
1242
1243 /*
1244  * Stripped down version of dqattach. This doesn't attach, or even look at the
1245  * dquots attached to the inode. The rationale is that there won't be any
1246  * attached at the time this is called from quotacheck.
1247  */
1248 STATIC int
1249 xfs_qm_dqget_noattach(
1250         xfs_inode_t     *ip,
1251         xfs_dquot_t     **O_udqpp,
1252         xfs_dquot_t     **O_gdqpp)
1253 {
1254         int             error;
1255         xfs_mount_t     *mp;
1256         xfs_dquot_t     *udqp, *gdqp;
1257
1258         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1259         mp = ip->i_mount;
1260         udqp = NULL;
1261         gdqp = NULL;
1262
1263         if (XFS_IS_UQUOTA_ON(mp)) {
1264                 ASSERT(ip->i_udquot == NULL);
1265                 /*
1266                  * We want the dquot allocated if it doesn't exist.
1267                  */
1268                 if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
1269                                          XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
1270                                          &udqp))) {
1271                         /*
1272                          * Shouldn't be able to turn off quotas here.
1273                          */
1274                         ASSERT(error != ESRCH);
1275                         ASSERT(error != ENOENT);
1276                         return error;
1277                 }
1278                 ASSERT(udqp);
1279         }
1280
1281         if (XFS_IS_OQUOTA_ON(mp)) {
1282                 ASSERT(ip->i_gdquot == NULL);
1283                 if (udqp)
1284                         xfs_dqunlock(udqp);
1285                 error = XFS_IS_GQUOTA_ON(mp) ?
1286                                 xfs_qm_dqget(mp, ip,
1287                                              ip->i_d.di_gid, XFS_DQ_GROUP,
1288                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1289                                              &gdqp) :
1290                                 xfs_qm_dqget(mp, ip,
1291                                              ip->i_d.di_projid, XFS_DQ_PROJ,
1292                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1293                                              &gdqp);
1294                 if (error) {
1295                         if (udqp)
1296                                 xfs_qm_dqrele(udqp);
1297                         ASSERT(error != ESRCH);
1298                         ASSERT(error != ENOENT);
1299                         return error;
1300                 }
1301                 ASSERT(gdqp);
1302
1303                 /* Reacquire the locks in the right order */
1304                 if (udqp) {
1305                         if (! xfs_qm_dqlock_nowait(udqp)) {
1306                                 xfs_dqunlock(gdqp);
1307                                 xfs_dqlock(udqp);
1308                                 xfs_dqlock(gdqp);
1309                         }
1310                 }
1311         }
1312
1313         *O_udqpp = udqp;
1314         *O_gdqpp = gdqp;
1315
1316 #ifdef QUOTADEBUG
1317         if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1318         if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1319 #endif
1320         return 0;
1321 }
1322
1323 /*
1324  * Create an inode and return with a reference already taken, but unlocked
1325  * This is how we create quota inodes
1326  */
1327 STATIC int
1328 xfs_qm_qino_alloc(
1329         xfs_mount_t     *mp,
1330         xfs_inode_t     **ip,
1331         __int64_t       sbfields,
1332         uint            flags)
1333 {
1334         xfs_trans_t     *tp;
1335         int             error;
1336         int             committed;
1337
1338         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1339         if ((error = xfs_trans_reserve(tp,
1340                                       XFS_QM_QINOCREATE_SPACE_RES(mp),
1341                                       XFS_CREATE_LOG_RES(mp), 0,
1342                                       XFS_TRANS_PERM_LOG_RES,
1343                                       XFS_CREATE_LOG_COUNT))) {
1344                 xfs_trans_cancel(tp, 0);
1345                 return error;
1346         }
1347
1348         if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
1349                                    &xfs_zerocr, 0, 1, ip, &committed))) {
1350                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1351                                  XFS_TRANS_ABORT);
1352                 return error;
1353         }
1354
1355         /*
1356          * Keep an extra reference to this quota inode. This inode is
1357          * locked exclusively and joined to the transaction already.
1358          */
1359         ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1360         IHOLD(*ip);
1361
1362         /*
1363          * Make the changes in the superblock, and log those too.
1364          * sbfields arg may contain fields other than *QUOTINO;
1365          * VERSIONNUM for example.
1366          */
1367         spin_lock(&mp->m_sb_lock);
1368         if (flags & XFS_QMOPT_SBVERSION) {
1369 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1370                 unsigned oldv = mp->m_sb.sb_versionnum;
1371 #endif
1372                 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1373                 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1374                                    XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1375                        (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1376                         XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1377
1378                 xfs_sb_version_addquota(&mp->m_sb);
1379                 mp->m_sb.sb_uquotino = NULLFSINO;
1380                 mp->m_sb.sb_gquotino = NULLFSINO;
1381
1382                 /* qflags will get updated _after_ quotacheck */
1383                 mp->m_sb.sb_qflags = 0;
1384 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1385                 cmn_err(CE_NOTE,
1386                         "Old superblock version %x, converting to %x.",
1387                         oldv, mp->m_sb.sb_versionnum);
1388 #endif
1389         }
1390         if (flags & XFS_QMOPT_UQUOTA)
1391                 mp->m_sb.sb_uquotino = (*ip)->i_ino;
1392         else
1393                 mp->m_sb.sb_gquotino = (*ip)->i_ino;
1394         spin_unlock(&mp->m_sb_lock);
1395         xfs_mod_sb(tp, sbfields);
1396
1397         if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1398                 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1399                 return error;
1400         }
1401         return 0;
1402 }
1403
1404
1405 STATIC void
1406 xfs_qm_reset_dqcounts(
1407         xfs_mount_t     *mp,
1408         xfs_buf_t       *bp,
1409         xfs_dqid_t      id,
1410         uint            type)
1411 {
1412         xfs_disk_dquot_t        *ddq;
1413         int                     j;
1414
1415         xfs_buftrace("RESET DQUOTS", bp);
1416         /*
1417          * Reset all counters and timers. They'll be
1418          * started afresh by xfs_qm_quotacheck.
1419          */
1420 #ifdef DEBUG
1421         j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1422         do_div(j, sizeof(xfs_dqblk_t));
1423         ASSERT(XFS_QM_DQPERBLK(mp) == j);
1424 #endif
1425         ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1426         for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
1427                 /*
1428                  * Do a sanity check, and if needed, repair the dqblk. Don't
1429                  * output any warnings because it's perfectly possible to
1430                  * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1431                  */
1432                 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1433                                       "xfs_quotacheck");
1434                 ddq->d_bcount = 0;
1435                 ddq->d_icount = 0;
1436                 ddq->d_rtbcount = 0;
1437                 ddq->d_btimer = 0;
1438                 ddq->d_itimer = 0;
1439                 ddq->d_rtbtimer = 0;
1440                 ddq->d_bwarns = 0;
1441                 ddq->d_iwarns = 0;
1442                 ddq->d_rtbwarns = 0;
1443                 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1444         }
1445 }
1446
1447 STATIC int
1448 xfs_qm_dqiter_bufs(
1449         xfs_mount_t     *mp,
1450         xfs_dqid_t      firstid,
1451         xfs_fsblock_t   bno,
1452         xfs_filblks_t   blkcnt,
1453         uint            flags)
1454 {
1455         xfs_buf_t       *bp;
1456         int             error;
1457         int             notcommitted;
1458         int             incr;
1459         int             type;
1460
1461         ASSERT(blkcnt > 0);
1462         notcommitted = 0;
1463         incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1464                 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1465         type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1466                 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1467         error = 0;
1468
1469         /*
1470          * Blkcnt arg can be a very big number, and might even be
1471          * larger than the log itself. So, we have to break it up into
1472          * manageable-sized transactions.
1473          * Note that we don't start a permanent transaction here; we might
1474          * not be able to get a log reservation for the whole thing up front,
1475          * and we don't really care to either, because we just discard
1476          * everything if we were to crash in the middle of this loop.
1477          */
1478         while (blkcnt--) {
1479                 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1480                               XFS_FSB_TO_DADDR(mp, bno),
1481                               (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
1482                 if (error)
1483                         break;
1484
1485                 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1486                 xfs_bdwrite(mp, bp);
1487                 /*
1488                  * goto the next block.
1489                  */
1490                 bno++;
1491                 firstid += XFS_QM_DQPERBLK(mp);
1492         }
1493         return error;
1494 }
1495
1496 /*
1497  * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1498  * caller supplied function for every chunk of dquots that we find.
1499  */
1500 STATIC int
1501 xfs_qm_dqiterate(
1502         xfs_mount_t     *mp,
1503         xfs_inode_t     *qip,
1504         uint            flags)
1505 {
1506         xfs_bmbt_irec_t         *map;
1507         int                     i, nmaps;       /* number of map entries */
1508         int                     error;          /* return value */
1509         xfs_fileoff_t           lblkno;
1510         xfs_filblks_t           maxlblkcnt;
1511         xfs_dqid_t              firstid;
1512         xfs_fsblock_t           rablkno;
1513         xfs_filblks_t           rablkcnt;
1514
1515         error = 0;
1516         /*
1517          * This looks racy, but we can't keep an inode lock across a
1518          * trans_reserve. But, this gets called during quotacheck, and that
1519          * happens only at mount time which is single threaded.
1520          */
1521         if (qip->i_d.di_nblocks == 0)
1522                 return 0;
1523
1524         map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1525
1526         lblkno = 0;
1527         maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1528         do {
1529                 nmaps = XFS_DQITER_MAP_SIZE;
1530                 /*
1531                  * We aren't changing the inode itself. Just changing
1532                  * some of its data. No new blocks are added here, and
1533                  * the inode is never added to the transaction.
1534                  */
1535                 xfs_ilock(qip, XFS_ILOCK_SHARED);
1536                 error = xfs_bmapi(NULL, qip, lblkno,
1537                                   maxlblkcnt - lblkno,
1538                                   XFS_BMAPI_METADATA,
1539                                   NULL,
1540                                   0, map, &nmaps, NULL, NULL);
1541                 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1542                 if (error)
1543                         break;
1544
1545                 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1546                 for (i = 0; i < nmaps; i++) {
1547                         ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1548                         ASSERT(map[i].br_blockcount);
1549
1550
1551                         lblkno += map[i].br_blockcount;
1552
1553                         if (map[i].br_startblock == HOLESTARTBLOCK)
1554                                 continue;
1555
1556                         firstid = (xfs_dqid_t) map[i].br_startoff *
1557                                 XFS_QM_DQPERBLK(mp);
1558                         /*
1559                          * Do a read-ahead on the next extent.
1560                          */
1561                         if ((i+1 < nmaps) &&
1562                             (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1563                                 rablkcnt =  map[i+1].br_blockcount;
1564                                 rablkno = map[i+1].br_startblock;
1565                                 while (rablkcnt--) {
1566                                         xfs_baread(mp->m_ddev_targp,
1567                                                XFS_FSB_TO_DADDR(mp, rablkno),
1568                                                (int)XFS_QI_DQCHUNKLEN(mp));
1569                                         rablkno++;
1570                                 }
1571                         }
1572                         /*
1573                          * Iterate thru all the blks in the extent and
1574                          * reset the counters of all the dquots inside them.
1575                          */
1576                         if ((error = xfs_qm_dqiter_bufs(mp,
1577                                                        firstid,
1578                                                        map[i].br_startblock,
1579                                                        map[i].br_blockcount,
1580                                                        flags))) {
1581                                 break;
1582                         }
1583                 }
1584
1585                 if (error)
1586                         break;
1587         } while (nmaps > 0);
1588
1589         kmem_free(map);
1590
1591         return error;
1592 }
1593
1594 /*
1595  * Called by dqusage_adjust in doing a quotacheck.
1596  * Given the inode, and a dquot (either USR or GRP, doesn't matter),
1597  * this updates its incore copy as well as the buffer copy. This is
1598  * so that once the quotacheck is done, we can just log all the buffers,
1599  * as opposed to logging numerous updates to individual dquots.
1600  */
1601 STATIC void
1602 xfs_qm_quotacheck_dqadjust(
1603         xfs_dquot_t             *dqp,
1604         xfs_qcnt_t              nblks,
1605         xfs_qcnt_t              rtblks)
1606 {
1607         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1608         xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
1609         /*
1610          * Adjust the inode count and the block count to reflect this inode's
1611          * resource usage.
1612          */
1613         be64_add_cpu(&dqp->q_core.d_icount, 1);
1614         dqp->q_res_icount++;
1615         if (nblks) {
1616                 be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1617                 dqp->q_res_bcount += nblks;
1618         }
1619         if (rtblks) {
1620                 be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1621                 dqp->q_res_rtbcount += rtblks;
1622         }
1623
1624         /*
1625          * Set default limits, adjust timers (since we changed usages)
1626          */
1627         if (! XFS_IS_SUSER_DQUOT(dqp)) {
1628                 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1629                 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1630         }
1631
1632         dqp->dq_flags |= XFS_DQ_DIRTY;
1633 }
1634
1635 STATIC int
1636 xfs_qm_get_rtblks(
1637         xfs_inode_t     *ip,
1638         xfs_qcnt_t      *O_rtblks)
1639 {
1640         xfs_filblks_t   rtblks;                 /* total rt blks */
1641         xfs_extnum_t    idx;                    /* extent record index */
1642         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1643         xfs_extnum_t    nextents;               /* number of extent entries */
1644         int             error;
1645
1646         ASSERT(XFS_IS_REALTIME_INODE(ip));
1647         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1648         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1649                 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1650                         return error;
1651         }
1652         rtblks = 0;
1653         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1654         for (idx = 0; idx < nextents; idx++)
1655                 rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1656         *O_rtblks = (xfs_qcnt_t)rtblks;
1657         return 0;
1658 }
1659
1660 /*
1661  * callback routine supplied to bulkstat(). Given an inumber, find its
1662  * dquots and update them to account for resources taken by that inode.
1663  */
1664 /* ARGSUSED */
1665 STATIC int
1666 xfs_qm_dqusage_adjust(
1667         xfs_mount_t     *mp,            /* mount point for filesystem */
1668         xfs_ino_t       ino,            /* inode number to get data for */
1669         void            __user *buffer, /* not used */
1670         int             ubsize,         /* not used */
1671         void            *private_data,  /* not used */
1672         xfs_daddr_t     bno,            /* starting block of inode cluster */
1673         int             *ubused,        /* not used */
1674         void            *dip,           /* on-disk inode pointer (not used) */
1675         int             *res)           /* result code value */
1676 {
1677         xfs_inode_t     *ip;
1678         xfs_dquot_t     *udqp, *gdqp;
1679         xfs_qcnt_t      nblks, rtblks;
1680         int             error;
1681
1682         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1683
1684         /*
1685          * rootino must have its resources accounted for, not so with the quota
1686          * inodes.
1687          */
1688         if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1689                 *res = BULKSTAT_RV_NOTHING;
1690                 return XFS_ERROR(EINVAL);
1691         }
1692
1693         /*
1694          * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1695          * interface expects the inode to be exclusively locked because that's
1696          * the case in all other instances. It's OK that we do this because
1697          * quotacheck is done only at mount time.
1698          */
1699         if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
1700                 *res = BULKSTAT_RV_NOTHING;
1701                 return error;
1702         }
1703
1704         /*
1705          * Obtain the locked dquots. In case of an error (eg. allocation
1706          * fails for ENOSPC), we return the negative of the error number
1707          * to bulkstat, so that it can get propagated to quotacheck() and
1708          * making us disable quotas for the file system.
1709          */
1710         if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1711                 xfs_iput(ip, XFS_ILOCK_EXCL);
1712                 *res = BULKSTAT_RV_GIVEUP;
1713                 return error;
1714         }
1715
1716         rtblks = 0;
1717         if (! XFS_IS_REALTIME_INODE(ip)) {
1718                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
1719         } else {
1720                 /*
1721                  * Walk thru the extent list and count the realtime blocks.
1722                  */
1723                 if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
1724                         xfs_iput(ip, XFS_ILOCK_EXCL);
1725                         if (udqp)
1726                                 xfs_qm_dqput(udqp);
1727                         if (gdqp)
1728                                 xfs_qm_dqput(gdqp);
1729                         *res = BULKSTAT_RV_GIVEUP;
1730                         return error;
1731                 }
1732                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1733         }
1734         ASSERT(ip->i_delayed_blks == 0);
1735
1736         /*
1737          * We can't release the inode while holding its dquot locks.
1738          * The inode can go into inactive and might try to acquire the dquotlocks.
1739          * So, just unlock here and do a vn_rele at the end.
1740          */
1741         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1742
1743         /*
1744          * Add the (disk blocks and inode) resources occupied by this
1745          * inode to its dquots. We do this adjustment in the incore dquot,
1746          * and also copy the changes to its buffer.
1747          * We don't care about putting these changes in a transaction
1748          * envelope because if we crash in the middle of a 'quotacheck'
1749          * we have to start from the beginning anyway.
1750          * Once we're done, we'll log all the dquot bufs.
1751          *
1752          * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1753          * and quotaoffs don't race. (Quotachecks happen at mount time only).
1754          */
1755         if (XFS_IS_UQUOTA_ON(mp)) {
1756                 ASSERT(udqp);
1757                 xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
1758                 xfs_qm_dqput(udqp);
1759         }
1760         if (XFS_IS_OQUOTA_ON(mp)) {
1761                 ASSERT(gdqp);
1762                 xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
1763                 xfs_qm_dqput(gdqp);
1764         }
1765         /*
1766          * Now release the inode. This will send it to 'inactive', and
1767          * possibly even free blocks.
1768          */
1769         IRELE(ip);
1770
1771         /*
1772          * Goto next inode.
1773          */
1774         *res = BULKSTAT_RV_DIDONE;
1775         return 0;
1776 }
1777
1778 /*
1779  * Walk thru all the filesystem inodes and construct a consistent view
1780  * of the disk quota world. If the quotacheck fails, disable quotas.
1781  */
1782 int
1783 xfs_qm_quotacheck(
1784         xfs_mount_t     *mp)
1785 {
1786         int             done, count, error;
1787         xfs_ino_t       lastino;
1788         size_t          structsz;
1789         xfs_inode_t     *uip, *gip;
1790         uint            flags;
1791
1792         count = INT_MAX;
1793         structsz = 1;
1794         lastino = 0;
1795         flags = 0;
1796
1797         ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
1798         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1799
1800         /*
1801          * There should be no cached dquots. The (simplistic) quotacheck
1802          * algorithm doesn't like that.
1803          */
1804         ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
1805
1806         cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1807
1808         /*
1809          * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1810          * their counters to zero. We need a clean slate.
1811          * We don't log our changes till later.
1812          */
1813         if ((uip = XFS_QI_UQIP(mp))) {
1814                 if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
1815                         goto error_return;
1816                 flags |= XFS_UQUOTA_CHKD;
1817         }
1818
1819         if ((gip = XFS_QI_GQIP(mp))) {
1820                 if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1821                                         XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
1822                         goto error_return;
1823                 flags |= XFS_OQUOTA_CHKD;
1824         }
1825
1826         do {
1827                 /*
1828                  * Iterate thru all the inodes in the file system,
1829                  * adjusting the corresponding dquot counters in core.
1830                  */
1831                 if ((error = xfs_bulkstat(mp, &lastino, &count,
1832                                      xfs_qm_dqusage_adjust, NULL,
1833                                      structsz, NULL, BULKSTAT_FG_IGET, &done)))
1834                         break;
1835
1836         } while (! done);
1837
1838         /*
1839          * We've made all the changes that we need to make incore.
1840          * Flush them down to disk buffers if everything was updated
1841          * successfully.
1842          */
1843         if (!error)
1844                 error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
1845
1846         /*
1847          * We can get this error if we couldn't do a dquot allocation inside
1848          * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1849          * dirty dquots that might be cached, we just want to get rid of them
1850          * and turn quotaoff. The dquots won't be attached to any of the inodes
1851          * at this point (because we intentionally didn't in dqget_noattach).
1852          */
1853         if (error) {
1854                 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
1855                 goto error_return;
1856         }
1857
1858         /*
1859          * We didn't log anything, because if we crashed, we'll have to
1860          * start the quotacheck from scratch anyway. However, we must make
1861          * sure that our dquot changes are secure before we put the
1862          * quotacheck'd stamp on the superblock. So, here we do a synchronous
1863          * flush.
1864          */
1865         XFS_bflush(mp->m_ddev_targp);
1866
1867         /*
1868          * If one type of quotas is off, then it will lose its
1869          * quotachecked status, since we won't be doing accounting for
1870          * that type anymore.
1871          */
1872         mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1873         mp->m_qflags |= flags;
1874
1875         XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
1876
1877  error_return:
1878         if (error) {
1879                 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
1880                         "Disabling quotas.",
1881                         mp->m_fsname, error);
1882                 /*
1883                  * We must turn off quotas.
1884                  */
1885                 ASSERT(mp->m_quotainfo != NULL);
1886                 ASSERT(xfs_Gqm != NULL);
1887                 xfs_qm_destroy_quotainfo(mp);
1888                 if (xfs_mount_reset_sbqflags(mp)) {
1889                         cmn_err(CE_WARN, "XFS quotacheck %s: "
1890                                 "Failed to reset quota flags.", mp->m_fsname);
1891                 }
1892         } else {
1893                 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
1894         }
1895         return (error);
1896 }
1897
1898 /*
1899  * This is called after the superblock has been read in and we're ready to
1900  * iget the quota inodes.
1901  */
1902 STATIC int
1903 xfs_qm_init_quotainos(
1904         xfs_mount_t     *mp)
1905 {
1906         xfs_inode_t     *uip, *gip;
1907         int             error;
1908         __int64_t       sbflags;
1909         uint            flags;
1910
1911         ASSERT(mp->m_quotainfo);
1912         uip = gip = NULL;
1913         sbflags = 0;
1914         flags = 0;
1915
1916         /*
1917          * Get the uquota and gquota inodes
1918          */
1919         if (xfs_sb_version_hasquota(&mp->m_sb)) {
1920                 if (XFS_IS_UQUOTA_ON(mp) &&
1921                     mp->m_sb.sb_uquotino != NULLFSINO) {
1922                         ASSERT(mp->m_sb.sb_uquotino > 0);
1923                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1924                                              0, 0, &uip, 0)))
1925                                 return XFS_ERROR(error);
1926                 }
1927                 if (XFS_IS_OQUOTA_ON(mp) &&
1928                     mp->m_sb.sb_gquotino != NULLFSINO) {
1929                         ASSERT(mp->m_sb.sb_gquotino > 0);
1930                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1931                                              0, 0, &gip, 0))) {
1932                                 if (uip)
1933                                         IRELE(uip);
1934                                 return XFS_ERROR(error);
1935                         }
1936                 }
1937         } else {
1938                 flags |= XFS_QMOPT_SBVERSION;
1939                 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1940                             XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
1941         }
1942
1943         /*
1944          * Create the two inodes, if they don't exist already. The changes
1945          * made above will get added to a transaction and logged in one of
1946          * the qino_alloc calls below.  If the device is readonly,
1947          * temporarily switch to read-write to do this.
1948          */
1949         if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1950                 if ((error = xfs_qm_qino_alloc(mp, &uip,
1951                                               sbflags | XFS_SB_UQUOTINO,
1952                                               flags | XFS_QMOPT_UQUOTA)))
1953                         return XFS_ERROR(error);
1954
1955                 flags &= ~XFS_QMOPT_SBVERSION;
1956         }
1957         if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
1958                 flags |= (XFS_IS_GQUOTA_ON(mp) ?
1959                                 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1960                 error = xfs_qm_qino_alloc(mp, &gip,
1961                                           sbflags | XFS_SB_GQUOTINO, flags);
1962                 if (error) {
1963                         if (uip)
1964                                 IRELE(uip);
1965
1966                         return XFS_ERROR(error);
1967                 }
1968         }
1969
1970         XFS_QI_UQIP(mp) = uip;
1971         XFS_QI_GQIP(mp) = gip;
1972
1973         return 0;
1974 }
1975
1976
1977 /*
1978  * Traverse the freelist of dquots and attempt to reclaim a maximum of
1979  * 'howmany' dquots. This operation races with dqlookup(), and attempts to
1980  * favor the lookup function ...
1981  * XXXsup merge this with qm_reclaim_one().
1982  */
1983 STATIC int
1984 xfs_qm_shake_freelist(
1985         int howmany)
1986 {
1987         int             nreclaimed;
1988         xfs_dqhash_t    *hash;
1989         xfs_dquot_t     *dqp, *nextdqp;
1990         int             restarts;
1991         int             nflushes;
1992
1993         if (howmany <= 0)
1994                 return 0;
1995
1996         nreclaimed = 0;
1997         restarts = 0;
1998         nflushes = 0;
1999
2000 #ifdef QUOTADEBUG
2001         cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
2002 #endif
2003         /* lock order is : hashchainlock, freelistlock, mplistlock */
2004  tryagain:
2005         xfs_qm_freelist_lock(xfs_Gqm);
2006
2007         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
2008              ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
2009               nreclaimed < howmany); ) {
2010                 xfs_dqlock(dqp);
2011
2012                 /*
2013                  * We are racing with dqlookup here. Naturally we don't
2014                  * want to reclaim a dquot that lookup wants.
2015                  */
2016                 if (dqp->dq_flags & XFS_DQ_WANT) {
2017                         xfs_dqunlock(dqp);
2018                         xfs_qm_freelist_unlock(xfs_Gqm);
2019                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2020                                 return nreclaimed;
2021                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2022                         goto tryagain;
2023                 }
2024
2025                 /*
2026                  * If the dquot is inactive, we are assured that it is
2027                  * not on the mplist or the hashlist, and that makes our
2028                  * life easier.
2029                  */
2030                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2031                         ASSERT(dqp->q_mount == NULL);
2032                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2033                         ASSERT(dqp->HL_PREVP == NULL);
2034                         ASSERT(dqp->MPL_PREVP == NULL);
2035                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2036                         nextdqp = dqp->dq_flnext;
2037                         goto off_freelist;
2038                 }
2039
2040                 ASSERT(dqp->MPL_PREVP);
2041                 /*
2042                  * Try to grab the flush lock. If this dquot is in the process of
2043                  * getting flushed to disk, we don't want to reclaim it.
2044                  */
2045                 if (!xfs_dqflock_nowait(dqp)) {
2046                         xfs_dqunlock(dqp);
2047                         dqp = dqp->dq_flnext;
2048                         continue;
2049                 }
2050
2051                 /*
2052                  * We have the flush lock so we know that this is not in the
2053                  * process of being flushed. So, if this is dirty, flush it
2054                  * DELWRI so that we don't get a freelist infested with
2055                  * dirty dquots.
2056                  */
2057                 if (XFS_DQ_IS_DIRTY(dqp)) {
2058                         int     error;
2059                         xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
2060                         /*
2061                          * We flush it delayed write, so don't bother
2062                          * releasing the mplock.
2063                          */
2064                         error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2065                         if (error) {
2066                                 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2067                         "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
2068                         }
2069                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2070                         dqp = dqp->dq_flnext;
2071                         continue;
2072                 }
2073                 /*
2074                  * We're trying to get the hashlock out of order. This races
2075                  * with dqlookup; so, we giveup and goto the next dquot if
2076                  * we couldn't get the hashlock. This way, we won't starve
2077                  * a dqlookup process that holds the hashlock that is
2078                  * waiting for the freelist lock.
2079                  */
2080                 if (! xfs_qm_dqhashlock_nowait(dqp)) {
2081                         xfs_dqfunlock(dqp);
2082                         xfs_dqunlock(dqp);
2083                         dqp = dqp->dq_flnext;
2084                         continue;
2085                 }
2086                 /*
2087                  * This races with dquot allocation code as well as dqflush_all
2088                  * and reclaim code. So, if we failed to grab the mplist lock,
2089                  * giveup everything and start over.
2090                  */
2091                 hash = dqp->q_hash;
2092                 ASSERT(hash);
2093                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2094                         /* XXX put a sentinel so that we can come back here */
2095                         xfs_dqfunlock(dqp);
2096                         xfs_dqunlock(dqp);
2097                         XFS_DQ_HASH_UNLOCK(hash);
2098                         xfs_qm_freelist_unlock(xfs_Gqm);
2099                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2100                                 return nreclaimed;
2101                         goto tryagain;
2102                 }
2103                 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
2104 #ifdef QUOTADEBUG
2105                 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2106                         dqp, be32_to_cpu(dqp->q_core.d_id));
2107 #endif
2108                 ASSERT(dqp->q_nrefs == 0);
2109                 nextdqp = dqp->dq_flnext;
2110                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2111                 XQM_HASHLIST_REMOVE(hash, dqp);
2112                 xfs_dqfunlock(dqp);
2113                 xfs_qm_mplist_unlock(dqp->q_mount);
2114                 XFS_DQ_HASH_UNLOCK(hash);
2115
2116  off_freelist:
2117                 XQM_FREELIST_REMOVE(dqp);
2118                 xfs_dqunlock(dqp);
2119                 nreclaimed++;
2120                 XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
2121                 xfs_qm_dqdestroy(dqp);
2122                 dqp = nextdqp;
2123         }
2124         xfs_qm_freelist_unlock(xfs_Gqm);
2125         return nreclaimed;
2126 }
2127
2128
2129 /*
2130  * The kmem_shake interface is invoked when memory is running low.
2131  */
2132 /* ARGSUSED */
2133 STATIC int
2134 xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2135 {
2136         int     ndqused, nfree, n;
2137
2138         if (!kmem_shake_allow(gfp_mask))
2139                 return 0;
2140         if (!xfs_Gqm)
2141                 return 0;
2142
2143         nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
2144         /* incore dquots in all f/s's */
2145         ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2146
2147         ASSERT(ndqused >= 0);
2148
2149         if (nfree <= ndqused && nfree < ndquot)
2150                 return 0;
2151
2152         ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
2153         n = nfree - ndqused - ndquot;           /* # over target */
2154
2155         return xfs_qm_shake_freelist(MAX(nfree, n));
2156 }
2157
2158
2159 /*
2160  * Just pop the least recently used dquot off the freelist and
2161  * recycle it. The returned dquot is locked.
2162  */
2163 STATIC xfs_dquot_t *
2164 xfs_qm_dqreclaim_one(void)
2165 {
2166         xfs_dquot_t     *dqpout;
2167         xfs_dquot_t     *dqp;
2168         int             restarts;
2169         int             nflushes;
2170
2171         restarts = 0;
2172         dqpout = NULL;
2173         nflushes = 0;
2174
2175         /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2176  startagain:
2177         xfs_qm_freelist_lock(xfs_Gqm);
2178
2179         FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2180                 xfs_dqlock(dqp);
2181
2182                 /*
2183                  * We are racing with dqlookup here. Naturally we don't
2184                  * want to reclaim a dquot that lookup wants. We release the
2185                  * freelist lock and start over, so that lookup will grab
2186                  * both the dquot and the freelistlock.
2187                  */
2188                 if (dqp->dq_flags & XFS_DQ_WANT) {
2189                         ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2190                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
2191                         xfs_dqunlock(dqp);
2192                         xfs_qm_freelist_unlock(xfs_Gqm);
2193                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2194                                 return NULL;
2195                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2196                         goto startagain;
2197                 }
2198
2199                 /*
2200                  * If the dquot is inactive, we are assured that it is
2201                  * not on the mplist or the hashlist, and that makes our
2202                  * life easier.
2203                  */
2204                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2205                         ASSERT(dqp->q_mount == NULL);
2206                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2207                         ASSERT(dqp->HL_PREVP == NULL);
2208                         ASSERT(dqp->MPL_PREVP == NULL);
2209                         XQM_FREELIST_REMOVE(dqp);
2210                         xfs_dqunlock(dqp);
2211                         dqpout = dqp;
2212                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2213                         break;
2214                 }
2215
2216                 ASSERT(dqp->q_hash);
2217                 ASSERT(dqp->MPL_PREVP);
2218
2219                 /*
2220                  * Try to grab the flush lock. If this dquot is in the process of
2221                  * getting flushed to disk, we don't want to reclaim it.
2222                  */
2223                 if (!xfs_dqflock_nowait(dqp)) {
2224                         xfs_dqunlock(dqp);
2225                         continue;
2226                 }
2227
2228                 /*
2229                  * We have the flush lock so we know that this is not in the
2230                  * process of being flushed. So, if this is dirty, flush it
2231                  * DELWRI so that we don't get a freelist infested with
2232                  * dirty dquots.
2233                  */
2234                 if (XFS_DQ_IS_DIRTY(dqp)) {
2235                         int     error;
2236                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
2237                         /*
2238                          * We flush it delayed write, so don't bother
2239                          * releasing the freelist lock.
2240                          */
2241                         error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2242                         if (error) {
2243                                 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2244                         "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2245                         }
2246                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2247                         continue;
2248                 }
2249
2250                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2251                         xfs_dqfunlock(dqp);
2252                         xfs_dqunlock(dqp);
2253                         continue;
2254                 }
2255
2256                 if (! xfs_qm_dqhashlock_nowait(dqp))
2257                         goto mplistunlock;
2258
2259                 ASSERT(dqp->q_nrefs == 0);
2260                 xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2261                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2262                 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2263                 XQM_FREELIST_REMOVE(dqp);
2264                 dqpout = dqp;
2265                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
2266  mplistunlock:
2267                 xfs_qm_mplist_unlock(dqp->q_mount);
2268                 xfs_dqfunlock(dqp);
2269                 xfs_dqunlock(dqp);
2270                 if (dqpout)
2271                         break;
2272         }
2273
2274         xfs_qm_freelist_unlock(xfs_Gqm);
2275         return dqpout;
2276 }
2277
2278
2279 /*------------------------------------------------------------------*/
2280
2281 /*
2282  * Return a new incore dquot. Depending on the number of
2283  * dquots in the system, we either allocate a new one on the kernel heap,
2284  * or reclaim a free one.
2285  * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
2286  * to reclaim an existing one from the freelist.
2287  */
2288 boolean_t
2289 xfs_qm_dqalloc_incore(
2290         xfs_dquot_t **O_dqpp)
2291 {
2292         xfs_dquot_t     *dqp;
2293
2294         /*
2295          * Check against high water mark to see if we want to pop
2296          * a nincompoop dquot off the freelist.
2297          */
2298         if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
2299                 /*
2300                  * Try to recycle a dquot from the freelist.
2301                  */
2302                 if ((dqp = xfs_qm_dqreclaim_one())) {
2303                         XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
2304                         /*
2305                          * Just zero the core here. The rest will get
2306                          * reinitialized by caller. XXX we shouldn't even
2307                          * do this zero ...
2308                          */
2309                         memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2310                         *O_dqpp = dqp;
2311                         return B_FALSE;
2312                 }
2313                 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2314         }
2315
2316         /*
2317          * Allocate a brand new dquot on the kernel heap and return it
2318          * to the caller to initialize.
2319          */
2320         ASSERT(xfs_Gqm->qm_dqzone != NULL);
2321         *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2322         atomic_inc(&xfs_Gqm->qm_totaldquots);
2323
2324         return B_TRUE;
2325 }
2326
2327
2328 /*
2329  * Start a transaction and write the incore superblock changes to
2330  * disk. flags parameter indicates which fields have changed.
2331  */
2332 int
2333 xfs_qm_write_sb_changes(
2334         xfs_mount_t     *mp,
2335         __int64_t       flags)
2336 {
2337         xfs_trans_t     *tp;
2338         int             error;
2339
2340 #ifdef QUOTADEBUG
2341         cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
2342 #endif
2343         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2344         if ((error = xfs_trans_reserve(tp, 0,
2345                                       mp->m_sb.sb_sectsize + 128, 0,
2346                                       0,
2347                                       XFS_DEFAULT_LOG_COUNT))) {
2348                 xfs_trans_cancel(tp, 0);
2349                 return error;
2350         }
2351
2352         xfs_mod_sb(tp, flags);
2353         error = xfs_trans_commit(tp, 0);
2354
2355         return error;
2356 }
2357
2358
2359 /* --------------- utility functions for vnodeops ---------------- */
2360
2361
2362 /*
2363  * Given an inode, a uid and gid (from cred_t) make sure that we have
2364  * allocated relevant dquot(s) on disk, and that we won't exceed inode
2365  * quotas by creating this file.
2366  * This also attaches dquot(s) to the given inode after locking it,
2367  * and returns the dquots corresponding to the uid and/or gid.
2368  *
2369  * in   : inode (unlocked)
2370  * out  : udquot, gdquot with references taken and unlocked
2371  */
2372 int
2373 xfs_qm_vop_dqalloc(
2374         xfs_mount_t     *mp,
2375         xfs_inode_t     *ip,
2376         uid_t           uid,
2377         gid_t           gid,
2378         prid_t          prid,
2379         uint            flags,
2380         xfs_dquot_t     **O_udqpp,
2381         xfs_dquot_t     **O_gdqpp)
2382 {
2383         int             error;
2384         xfs_dquot_t     *uq, *gq;
2385         uint            lockflags;
2386
2387         if (!XFS_IS_QUOTA_ON(mp))
2388                 return 0;
2389
2390         lockflags = XFS_ILOCK_EXCL;
2391         xfs_ilock(ip, lockflags);
2392
2393         if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
2394                 gid = ip->i_d.di_gid;
2395
2396         /*
2397          * Attach the dquot(s) to this inode, doing a dquot allocation
2398          * if necessary. The dquot(s) will not be locked.
2399          */
2400         if (XFS_NOT_DQATTACHED(mp, ip)) {
2401                 if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
2402                                             XFS_QMOPT_ILOCKED))) {
2403                         xfs_iunlock(ip, lockflags);
2404                         return error;
2405                 }
2406         }
2407
2408         uq = gq = NULL;
2409         if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
2410                 if (ip->i_d.di_uid != uid) {
2411                         /*
2412                          * What we need is the dquot that has this uid, and
2413                          * if we send the inode to dqget, the uid of the inode
2414                          * takes priority over what's sent in the uid argument.
2415                          * We must unlock inode here before calling dqget if
2416                          * we're not sending the inode, because otherwise
2417                          * we'll deadlock by doing trans_reserve while
2418                          * holding ilock.
2419                          */
2420                         xfs_iunlock(ip, lockflags);
2421                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
2422                                                  XFS_DQ_USER,
2423                                                  XFS_QMOPT_DQALLOC |
2424                                                  XFS_QMOPT_DOWARN,
2425                                                  &uq))) {
2426                                 ASSERT(error != ENOENT);
2427                                 return error;
2428                         }
2429                         /*
2430                          * Get the ilock in the right order.
2431                          */
2432                         xfs_dqunlock(uq);
2433                         lockflags = XFS_ILOCK_SHARED;
2434                         xfs_ilock(ip, lockflags);
2435                 } else {
2436                         /*
2437                          * Take an extra reference, because we'll return
2438                          * this to caller
2439                          */
2440                         ASSERT(ip->i_udquot);
2441                         uq = ip->i_udquot;
2442                         xfs_dqlock(uq);
2443                         XFS_DQHOLD(uq);
2444                         xfs_dqunlock(uq);
2445                 }
2446         }
2447         if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
2448                 if (ip->i_d.di_gid != gid) {
2449                         xfs_iunlock(ip, lockflags);
2450                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2451                                                  XFS_DQ_GROUP,
2452                                                  XFS_QMOPT_DQALLOC |
2453                                                  XFS_QMOPT_DOWARN,
2454                                                  &gq))) {
2455                                 if (uq)
2456                                         xfs_qm_dqrele(uq);
2457                                 ASSERT(error != ENOENT);
2458                                 return error;
2459                         }
2460                         xfs_dqunlock(gq);
2461                         lockflags = XFS_ILOCK_SHARED;
2462                         xfs_ilock(ip, lockflags);
2463                 } else {
2464                         ASSERT(ip->i_gdquot);
2465                         gq = ip->i_gdquot;
2466                         xfs_dqlock(gq);
2467                         XFS_DQHOLD(gq);
2468                         xfs_dqunlock(gq);
2469                 }
2470         } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2471                 if (ip->i_d.di_projid != prid) {
2472                         xfs_iunlock(ip, lockflags);
2473                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2474                                                  XFS_DQ_PROJ,
2475                                                  XFS_QMOPT_DQALLOC |
2476                                                  XFS_QMOPT_DOWARN,
2477                                                  &gq))) {
2478                                 if (uq)
2479                                         xfs_qm_dqrele(uq);
2480                                 ASSERT(error != ENOENT);
2481                                 return (error);
2482                         }
2483                         xfs_dqunlock(gq);
2484                         lockflags = XFS_ILOCK_SHARED;
2485                         xfs_ilock(ip, lockflags);
2486                 } else {
2487                         ASSERT(ip->i_gdquot);
2488                         gq = ip->i_gdquot;
2489                         xfs_dqlock(gq);
2490                         XFS_DQHOLD(gq);
2491                         xfs_dqunlock(gq);
2492                 }
2493         }
2494         if (uq)
2495                 xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
2496
2497         xfs_iunlock(ip, lockflags);
2498         if (O_udqpp)
2499                 *O_udqpp = uq;
2500         else if (uq)
2501                 xfs_qm_dqrele(uq);
2502         if (O_gdqpp)
2503                 *O_gdqpp = gq;
2504         else if (gq)
2505                 xfs_qm_dqrele(gq);
2506         return 0;
2507 }
2508
2509 /*
2510  * Actually transfer ownership, and do dquot modifications.
2511  * These were already reserved.
2512  */
2513 xfs_dquot_t *
2514 xfs_qm_vop_chown(
2515         xfs_trans_t     *tp,
2516         xfs_inode_t     *ip,
2517         xfs_dquot_t     **IO_olddq,
2518         xfs_dquot_t     *newdq)
2519 {
2520         xfs_dquot_t     *prevdq;
2521         uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
2522                                  XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2523
2524         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2525         ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2526
2527         /* old dquot */
2528         prevdq = *IO_olddq;
2529         ASSERT(prevdq);
2530         ASSERT(prevdq != newdq);
2531
2532         xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2533         xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2534
2535         /* the sparkling new dquot */
2536         xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2537         xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2538
2539         /*
2540          * Take an extra reference, because the inode
2541          * is going to keep this dquot pointer even
2542          * after the trans_commit.
2543          */
2544         xfs_dqlock(newdq);
2545         XFS_DQHOLD(newdq);
2546         xfs_dqunlock(newdq);
2547         *IO_olddq = newdq;
2548
2549         return prevdq;
2550 }
2551
2552 /*
2553  * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2554  */
2555 int
2556 xfs_qm_vop_chown_reserve(
2557         xfs_trans_t     *tp,
2558         xfs_inode_t     *ip,
2559         xfs_dquot_t     *udqp,
2560         xfs_dquot_t     *gdqp,
2561         uint            flags)
2562 {
2563         int             error;
2564         xfs_mount_t     *mp;
2565         uint            delblks, blkflags, prjflags = 0;
2566         xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2567
2568         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2569         mp = ip->i_mount;
2570         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2571
2572         delblks = ip->i_delayed_blks;
2573         delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2574         blkflags = XFS_IS_REALTIME_INODE(ip) ?
2575                         XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2576
2577         if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2578             ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
2579                 delblksudq = udqp;
2580                 /*
2581                  * If there are delayed allocation blocks, then we have to
2582                  * unreserve those from the old dquot, and add them to the
2583                  * new dquot.
2584                  */
2585                 if (delblks) {
2586                         ASSERT(ip->i_udquot);
2587                         unresudq = ip->i_udquot;
2588                 }
2589         }
2590         if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2591                 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2592                      ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
2593                         prjflags = XFS_QMOPT_ENOSPC;
2594
2595                 if (prjflags ||
2596                     (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2597                      ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
2598                         delblksgdq = gdqp;
2599                         if (delblks) {
2600                                 ASSERT(ip->i_gdquot);
2601                                 unresgdq = ip->i_gdquot;
2602                         }
2603                 }
2604         }
2605
2606         if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2607                                 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2608                                 flags | blkflags | prjflags)))
2609                 return (error);
2610
2611         /*
2612          * Do the delayed blks reservations/unreservations now. Since, these
2613          * are done without the help of a transaction, if a reservation fails
2614          * its previous reservations won't be automatically undone by trans
2615          * code. So, we have to do it manually here.
2616          */
2617         if (delblks) {
2618                 /*
2619                  * Do the reservations first. Unreservation can't fail.
2620                  */
2621                 ASSERT(delblksudq || delblksgdq);
2622                 ASSERT(unresudq || unresgdq);
2623                 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2624                                 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2625                                 flags | blkflags | prjflags)))
2626                         return (error);
2627                 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2628                                 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2629                                 blkflags);
2630         }
2631
2632         return (0);
2633 }
2634
2635 int
2636 xfs_qm_vop_rename_dqattach(
2637         xfs_inode_t     **i_tab)
2638 {
2639         xfs_inode_t     *ip;
2640         int             i;
2641         int             error;
2642
2643         ip = i_tab[0];
2644
2645         if (! XFS_IS_QUOTA_ON(ip->i_mount))
2646                 return 0;
2647
2648         if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2649                 error = xfs_qm_dqattach(ip, 0);
2650                 if (error)
2651                         return error;
2652         }
2653         for (i = 1; (i < 4 && i_tab[i]); i++) {
2654                 /*
2655                  * Watch out for duplicate entries in the table.
2656                  */
2657                 if ((ip = i_tab[i]) != i_tab[i-1]) {
2658                         if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2659                                 error = xfs_qm_dqattach(ip, 0);
2660                                 if (error)
2661                                         return error;
2662                         }
2663                 }
2664         }
2665         return 0;
2666 }
2667
2668 void
2669 xfs_qm_vop_dqattach_and_dqmod_newinode(
2670         xfs_trans_t     *tp,
2671         xfs_inode_t     *ip,
2672         xfs_dquot_t     *udqp,
2673         xfs_dquot_t     *gdqp)
2674 {
2675         if (!XFS_IS_QUOTA_ON(tp->t_mountp))
2676                 return;
2677
2678         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2679         ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
2680
2681         if (udqp) {
2682                 xfs_dqlock(udqp);
2683                 XFS_DQHOLD(udqp);
2684                 xfs_dqunlock(udqp);
2685                 ASSERT(ip->i_udquot == NULL);
2686                 ip->i_udquot = udqp;
2687                 ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
2688                 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2689                 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2690         }
2691         if (gdqp) {
2692                 xfs_dqlock(gdqp);
2693                 XFS_DQHOLD(gdqp);
2694                 xfs_dqunlock(gdqp);
2695                 ASSERT(ip->i_gdquot == NULL);
2696                 ip->i_gdquot = gdqp;
2697                 ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
2698                 ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
2699                         ip->i_d.di_gid : ip->i_d.di_projid) ==
2700                                 be32_to_cpu(gdqp->q_core.d_id));
2701                 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2702         }
2703 }
2704
2705 /* ------------- list stuff -----------------*/
2706 STATIC void
2707 xfs_qm_freelist_init(xfs_frlist_t *ql)
2708 {
2709         ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2710         mutex_init(&ql->qh_lock);
2711         ql->qh_version = 0;
2712         ql->qh_nelems = 0;
2713 }
2714
2715 STATIC void
2716 xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2717 {
2718         xfs_dquot_t     *dqp, *nextdqp;
2719
2720         mutex_lock(&ql->qh_lock);
2721         for (dqp = ql->qh_next;
2722              dqp != (xfs_dquot_t *)ql; ) {
2723                 xfs_dqlock(dqp);
2724                 nextdqp = dqp->dq_flnext;
2725 #ifdef QUOTADEBUG
2726                 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2727 #endif
2728                 XQM_FREELIST_REMOVE(dqp);
2729                 xfs_dqunlock(dqp);
2730                 xfs_qm_dqdestroy(dqp);
2731                 dqp = nextdqp;
2732         }
2733         mutex_unlock(&ql->qh_lock);
2734         mutex_destroy(&ql->qh_lock);
2735
2736         ASSERT(ql->qh_nelems == 0);
2737 }
2738
2739 STATIC void
2740 xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2741 {
2742         dq->dq_flnext = ql->qh_next;
2743         dq->dq_flprev = (xfs_dquot_t *)ql;
2744         ql->qh_next = dq;
2745         dq->dq_flnext->dq_flprev = dq;
2746         xfs_Gqm->qm_dqfreelist.qh_nelems++;
2747         xfs_Gqm->qm_dqfreelist.qh_version++;
2748 }
2749
2750 void
2751 xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2752 {
2753         xfs_dquot_t *next = dq->dq_flnext;
2754         xfs_dquot_t *prev = dq->dq_flprev;
2755
2756         next->dq_flprev = prev;
2757         prev->dq_flnext = next;
2758         dq->dq_flnext = dq->dq_flprev = dq;
2759         xfs_Gqm->qm_dqfreelist.qh_nelems--;
2760         xfs_Gqm->qm_dqfreelist.qh_version++;
2761 }
2762
2763 void
2764 xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2765 {
2766         xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2767 }
2768
2769 STATIC int
2770 xfs_qm_dqhashlock_nowait(
2771         xfs_dquot_t *dqp)
2772 {
2773         int locked;
2774
2775         locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
2776         return locked;
2777 }
2778
2779 int
2780 xfs_qm_freelist_lock_nowait(
2781         xfs_qm_t *xqm)
2782 {
2783         int locked;
2784
2785         locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
2786         return locked;
2787 }
2788
2789 STATIC int
2790 xfs_qm_mplist_nowait(
2791         xfs_mount_t     *mp)
2792 {
2793         int locked;
2794
2795         ASSERT(mp->m_quotainfo);
2796         locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
2797         return locked;
2798 }