Merge master.kernel.org:/home/rmk/linux-2.6-arm
[linux-2.6] / fs / xfs / quota / xfs_qm.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_btree.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_itable.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_bmap.h"
44 #include "xfs_rw.h"
45 #include "xfs_acl.h"
46 #include "xfs_attr.h"
47 #include "xfs_buf_item.h"
48 #include "xfs_trans_space.h"
49 #include "xfs_utils.h"
50 #include "xfs_qm.h"
51
52 /*
53  * The global quota manager. There is only one of these for the entire
54  * system, _not_ one per file system. XQM keeps track of the overall
55  * quota functionality, including maintaining the freelist and hash
56  * tables of dquots.
57  */
58 mutex_t         xfs_Gqm_lock;
59 struct xfs_qm   *xfs_Gqm;
60 uint            ndquot;
61
62 kmem_zone_t     *qm_dqzone;
63 kmem_zone_t     *qm_dqtrxzone;
64
65 static cred_t   xfs_zerocr;
66
67 STATIC void     xfs_qm_list_init(xfs_dqlist_t *, char *, int);
68 STATIC void     xfs_qm_list_destroy(xfs_dqlist_t *);
69
70 STATIC void     xfs_qm_freelist_init(xfs_frlist_t *);
71 STATIC void     xfs_qm_freelist_destroy(xfs_frlist_t *);
72 STATIC int      xfs_qm_mplist_nowait(xfs_mount_t *);
73 STATIC int      xfs_qm_dqhashlock_nowait(xfs_dquot_t *);
74
75 STATIC int      xfs_qm_init_quotainos(xfs_mount_t *);
76 STATIC int      xfs_qm_init_quotainfo(xfs_mount_t *);
77 STATIC int      xfs_qm_shake(int, gfp_t);
78
79 static struct shrinker xfs_qm_shaker = {
80         .shrink = xfs_qm_shake,
81         .seeks = DEFAULT_SEEKS,
82 };
83
84 #ifdef DEBUG
85 extern mutex_t  qcheck_lock;
86 #endif
87
88 #ifdef QUOTADEBUG
89 #define XQM_LIST_PRINT(l, NXT, title) \
90 { \
91         xfs_dquot_t     *dqp; int i = 0; \
92         cmn_err(CE_DEBUG, "%s (#%d)", title, (int) (l)->qh_nelems); \
93         for (dqp = (l)->qh_next; dqp != NULL; dqp = dqp->NXT) { \
94                 cmn_err(CE_DEBUG, "   %d.  \"%d (%s)\"   " \
95                                   "bcnt = %d, icnt = %d, refs = %d", \
96                         ++i, (int) be32_to_cpu(dqp->q_core.d_id), \
97                         DQFLAGTO_TYPESTR(dqp),       \
98                         (int) be64_to_cpu(dqp->q_core.d_bcount), \
99                         (int) be64_to_cpu(dqp->q_core.d_icount), \
100                         (int) dqp->q_nrefs);  } \
101 }
102 #else
103 #define XQM_LIST_PRINT(l, NXT, title) do { } while (0)
104 #endif
105
106 /*
107  * Initialize the XQM structure.
108  * Note that there is not one quota manager per file system.
109  */
110 STATIC struct xfs_qm *
111 xfs_Gqm_init(void)
112 {
113         xfs_dqhash_t    *udqhash, *gdqhash;
114         xfs_qm_t        *xqm;
115         size_t          hsize;
116         uint            i;
117
118         /*
119          * Initialize the dquot hash tables.
120          */
121         udqhash = kmem_zalloc_greedy(&hsize,
122                                      XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
123                                      XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t),
124                                      KM_SLEEP | KM_MAYFAIL | KM_LARGE);
125         gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE);
126         hsize /= sizeof(xfs_dqhash_t);
127         ndquot = hsize << 8;
128
129         xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
130         xqm->qm_dqhashmask = hsize - 1;
131         xqm->qm_usr_dqhtable = udqhash;
132         xqm->qm_grp_dqhtable = gdqhash;
133         ASSERT(xqm->qm_usr_dqhtable != NULL);
134         ASSERT(xqm->qm_grp_dqhtable != NULL);
135
136         for (i = 0; i < hsize; i++) {
137                 xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
138                 xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
139         }
140
141         /*
142          * Freelist of all dquots of all file systems
143          */
144         xfs_qm_freelist_init(&(xqm->qm_dqfreelist));
145
146         /*
147          * dquot zone. we register our own low-memory callback.
148          */
149         if (!qm_dqzone) {
150                 xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
151                                                 "xfs_dquots");
152                 qm_dqzone = xqm->qm_dqzone;
153         } else
154                 xqm->qm_dqzone = qm_dqzone;
155
156         register_shrinker(&xfs_qm_shaker);
157
158         /*
159          * The t_dqinfo portion of transactions.
160          */
161         if (!qm_dqtrxzone) {
162                 xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
163                                                    "xfs_dqtrx");
164                 qm_dqtrxzone = xqm->qm_dqtrxzone;
165         } else
166                 xqm->qm_dqtrxzone = qm_dqtrxzone;
167
168         atomic_set(&xqm->qm_totaldquots, 0);
169         xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
170         xqm->qm_nrefs = 0;
171 #ifdef DEBUG
172         mutex_init(&qcheck_lock);
173 #endif
174         return xqm;
175 }
176
177 /*
178  * Destroy the global quota manager when its reference count goes to zero.
179  */
180 STATIC void
181 xfs_qm_destroy(
182         struct xfs_qm   *xqm)
183 {
184         int             hsize, i;
185
186         ASSERT(xqm != NULL);
187         ASSERT(xqm->qm_nrefs == 0);
188         unregister_shrinker(&xfs_qm_shaker);
189         hsize = xqm->qm_dqhashmask + 1;
190         for (i = 0; i < hsize; i++) {
191                 xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
192                 xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
193         }
194         kmem_free(xqm->qm_usr_dqhtable);
195         kmem_free(xqm->qm_grp_dqhtable);
196         xqm->qm_usr_dqhtable = NULL;
197         xqm->qm_grp_dqhtable = NULL;
198         xqm->qm_dqhashmask = 0;
199         xfs_qm_freelist_destroy(&(xqm->qm_dqfreelist));
200 #ifdef DEBUG
201         mutex_destroy(&qcheck_lock);
202 #endif
203         kmem_free(xqm);
204 }
205
206 /*
207  * Called at mount time to let XQM know that another file system is
208  * starting quotas. This isn't crucial information as the individual mount
209  * structures are pretty independent, but it helps the XQM keep a
210  * global view of what's going on.
211  */
212 /* ARGSUSED */
213 STATIC int
214 xfs_qm_hold_quotafs_ref(
215         struct xfs_mount *mp)
216 {
217         /*
218          * Need to lock the xfs_Gqm structure for things like this. For example,
219          * the structure could disappear between the entry to this routine and
220          * a HOLD operation if not locked.
221          */
222         XFS_QM_LOCK(xfs_Gqm);
223
224         if (xfs_Gqm == NULL)
225                 xfs_Gqm = xfs_Gqm_init();
226         /*
227          * We can keep a list of all filesystems with quotas mounted for
228          * debugging and statistical purposes, but ...
229          * Just take a reference and get out.
230          */
231         XFS_QM_HOLD(xfs_Gqm);
232         XFS_QM_UNLOCK(xfs_Gqm);
233
234         return 0;
235 }
236
237
238 /*
239  * Release the reference that a filesystem took at mount time,
240  * so that we know when we need to destroy the entire quota manager.
241  */
242 /* ARGSUSED */
243 STATIC void
244 xfs_qm_rele_quotafs_ref(
245         struct xfs_mount *mp)
246 {
247         xfs_dquot_t     *dqp, *nextdqp;
248
249         ASSERT(xfs_Gqm);
250         ASSERT(xfs_Gqm->qm_nrefs > 0);
251
252         /*
253          * Go thru the freelist and destroy all inactive dquots.
254          */
255         xfs_qm_freelist_lock(xfs_Gqm);
256
257         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
258              dqp != (xfs_dquot_t *)&(xfs_Gqm->qm_dqfreelist); ) {
259                 xfs_dqlock(dqp);
260                 nextdqp = dqp->dq_flnext;
261                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
262                         ASSERT(dqp->q_mount == NULL);
263                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
264                         ASSERT(dqp->HL_PREVP == NULL);
265                         ASSERT(dqp->MPL_PREVP == NULL);
266                         XQM_FREELIST_REMOVE(dqp);
267                         xfs_dqunlock(dqp);
268                         xfs_qm_dqdestroy(dqp);
269                 } else {
270                         xfs_dqunlock(dqp);
271                 }
272                 dqp = nextdqp;
273         }
274         xfs_qm_freelist_unlock(xfs_Gqm);
275
276         /*
277          * Destroy the entire XQM. If somebody mounts with quotaon, this'll
278          * be restarted.
279          */
280         XFS_QM_LOCK(xfs_Gqm);
281         XFS_QM_RELE(xfs_Gqm);
282         if (xfs_Gqm->qm_nrefs == 0) {
283                 xfs_qm_destroy(xfs_Gqm);
284                 xfs_Gqm = NULL;
285         }
286         XFS_QM_UNLOCK(xfs_Gqm);
287 }
288
289 /*
290  * Just destroy the quotainfo structure.
291  */
292 void
293 xfs_qm_unmount_quotadestroy(
294         xfs_mount_t     *mp)
295 {
296         if (mp->m_quotainfo)
297                 xfs_qm_destroy_quotainfo(mp);
298 }
299
300
301 /*
302  * This is called from xfs_mountfs to start quotas and initialize all
303  * necessary data structures like quotainfo.  This is also responsible for
304  * running a quotacheck as necessary.  We are guaranteed that the superblock
305  * is consistently read in at this point.
306  *
307  * If we fail here, the mount will continue with quota turned off. We don't
308  * need to inidicate success or failure at all.
309  */
310 void
311 xfs_qm_mount_quotas(
312         xfs_mount_t     *mp)
313 {
314         int             error = 0;
315         uint            sbf;
316
317         /*
318          * If quotas on realtime volumes is not supported, we disable
319          * quotas immediately.
320          */
321         if (mp->m_sb.sb_rextents) {
322                 cmn_err(CE_NOTE,
323                         "Cannot turn on quotas for realtime filesystem %s",
324                         mp->m_fsname);
325                 mp->m_qflags = 0;
326                 goto write_changes;
327         }
328
329         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
330
331         /*
332          * Allocate the quotainfo structure inside the mount struct, and
333          * create quotainode(s), and change/rev superblock if necessary.
334          */
335         error = xfs_qm_init_quotainfo(mp);
336         if (error) {
337                 /*
338                  * We must turn off quotas.
339                  */
340                 ASSERT(mp->m_quotainfo == NULL);
341                 mp->m_qflags = 0;
342                 goto write_changes;
343         }
344         /*
345          * If any of the quotas are not consistent, do a quotacheck.
346          */
347         if (XFS_QM_NEED_QUOTACHECK(mp)) {
348                 error = xfs_qm_quotacheck(mp);
349                 if (error) {
350                         /* Quotacheck failed and disabled quotas. */
351                         return;
352                 }
353         }
354         /* 
355          * If one type of quotas is off, then it will lose its
356          * quotachecked status, since we won't be doing accounting for
357          * that type anymore.
358          */
359         if (!XFS_IS_UQUOTA_ON(mp))
360                 mp->m_qflags &= ~XFS_UQUOTA_CHKD;
361         if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
362                 mp->m_qflags &= ~XFS_OQUOTA_CHKD;
363
364  write_changes:
365         /*
366          * We actually don't have to acquire the m_sb_lock at all.
367          * This can only be called from mount, and that's single threaded. XXX
368          */
369         spin_lock(&mp->m_sb_lock);
370         sbf = mp->m_sb.sb_qflags;
371         mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
372         spin_unlock(&mp->m_sb_lock);
373
374         if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
375                 if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
376                         /*
377                          * We could only have been turning quotas off.
378                          * We aren't in very good shape actually because
379                          * the incore structures are convinced that quotas are
380                          * off, but the on disk superblock doesn't know that !
381                          */
382                         ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
383                         xfs_fs_cmn_err(CE_ALERT, mp,
384                                 "XFS mount_quotas: Superblock update failed!");
385                 }
386         }
387
388         if (error) {
389                 xfs_fs_cmn_err(CE_WARN, mp,
390                         "Failed to initialize disk quotas.");
391         }
392         return;
393 }
394
395 /*
396  * Called from the vfsops layer.
397  */
398 void
399 xfs_qm_unmount_quotas(
400         xfs_mount_t     *mp)
401 {
402         /*
403          * Release the dquots that root inode, et al might be holding,
404          * before we flush quotas and blow away the quotainfo structure.
405          */
406         ASSERT(mp->m_rootip);
407         xfs_qm_dqdetach(mp->m_rootip);
408         if (mp->m_rbmip)
409                 xfs_qm_dqdetach(mp->m_rbmip);
410         if (mp->m_rsumip)
411                 xfs_qm_dqdetach(mp->m_rsumip);
412
413         /*
414          * Release the quota inodes.
415          */
416         if (mp->m_quotainfo) {
417                 if (mp->m_quotainfo->qi_uquotaip) {
418                         IRELE(mp->m_quotainfo->qi_uquotaip);
419                         mp->m_quotainfo->qi_uquotaip = NULL;
420                 }
421                 if (mp->m_quotainfo->qi_gquotaip) {
422                         IRELE(mp->m_quotainfo->qi_gquotaip);
423                         mp->m_quotainfo->qi_gquotaip = NULL;
424                 }
425         }
426 }
427
428 /*
429  * Flush all dquots of the given file system to disk. The dquots are
430  * _not_ purged from memory here, just their data written to disk.
431  */
432 STATIC int
433 xfs_qm_dqflush_all(
434         xfs_mount_t     *mp,
435         int             flags)
436 {
437         int             recl;
438         xfs_dquot_t     *dqp;
439         int             niters;
440         int             error;
441
442         if (mp->m_quotainfo == NULL)
443                 return 0;
444         niters = 0;
445 again:
446         xfs_qm_mplist_lock(mp);
447         FOREACH_DQUOT_IN_MP(dqp, mp) {
448                 xfs_dqlock(dqp);
449                 if (! XFS_DQ_IS_DIRTY(dqp)) {
450                         xfs_dqunlock(dqp);
451                         continue;
452                 }
453                 xfs_dqtrace_entry(dqp, "FLUSHALL: DQDIRTY");
454                 /* XXX a sentinel would be better */
455                 recl = XFS_QI_MPLRECLAIMS(mp);
456                 if (!xfs_dqflock_nowait(dqp)) {
457                         /*
458                          * If we can't grab the flush lock then check
459                          * to see if the dquot has been flushed delayed
460                          * write.  If so, grab its buffer and send it
461                          * out immediately.  We'll be able to acquire
462                          * the flush lock when the I/O completes.
463                          */
464                         xfs_qm_dqflock_pushbuf_wait(dqp);
465                 }
466                 /*
467                  * Let go of the mplist lock. We don't want to hold it
468                  * across a disk write.
469                  */
470                 xfs_qm_mplist_unlock(mp);
471                 error = xfs_qm_dqflush(dqp, flags);
472                 xfs_dqunlock(dqp);
473                 if (error)
474                         return error;
475
476                 xfs_qm_mplist_lock(mp);
477                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
478                         xfs_qm_mplist_unlock(mp);
479                         /* XXX restart limit */
480                         goto again;
481                 }
482         }
483
484         xfs_qm_mplist_unlock(mp);
485         /* return ! busy */
486         return 0;
487 }
488 /*
489  * Release the group dquot pointers the user dquots may be
490  * carrying around as a hint. mplist is locked on entry and exit.
491  */
492 STATIC void
493 xfs_qm_detach_gdquots(
494         xfs_mount_t     *mp)
495 {
496         xfs_dquot_t     *dqp, *gdqp;
497         int             nrecl;
498
499  again:
500         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
501         dqp = XFS_QI_MPLNEXT(mp);
502         while (dqp) {
503                 xfs_dqlock(dqp);
504                 if ((gdqp = dqp->q_gdquot)) {
505                         xfs_dqlock(gdqp);
506                         dqp->q_gdquot = NULL;
507                 }
508                 xfs_dqunlock(dqp);
509
510                 if (gdqp) {
511                         /*
512                          * Can't hold the mplist lock across a dqput.
513                          * XXXmust convert to marker based iterations here.
514                          */
515                         nrecl = XFS_QI_MPLRECLAIMS(mp);
516                         xfs_qm_mplist_unlock(mp);
517                         xfs_qm_dqput(gdqp);
518
519                         xfs_qm_mplist_lock(mp);
520                         if (nrecl != XFS_QI_MPLRECLAIMS(mp))
521                                 goto again;
522                 }
523                 dqp = dqp->MPL_NEXT;
524         }
525 }
526
527 /*
528  * Go through all the incore dquots of this file system and take them
529  * off the mplist and hashlist, if the dquot type matches the dqtype
530  * parameter. This is used when turning off quota accounting for
531  * users and/or groups, as well as when the filesystem is unmounting.
532  */
533 STATIC int
534 xfs_qm_dqpurge_int(
535         xfs_mount_t     *mp,
536         uint            flags) /* QUOTAOFF/UMOUNTING/UQUOTA/PQUOTA/GQUOTA */
537 {
538         xfs_dquot_t     *dqp;
539         uint            dqtype;
540         int             nrecl;
541         xfs_dquot_t     *nextdqp;
542         int             nmisses;
543
544         if (mp->m_quotainfo == NULL)
545                 return 0;
546
547         dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
548         dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
549         dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
550
551         xfs_qm_mplist_lock(mp);
552
553         /*
554          * In the first pass through all incore dquots of this filesystem,
555          * we release the group dquot pointers the user dquots may be
556          * carrying around as a hint. We need to do this irrespective of
557          * what's being turned off.
558          */
559         xfs_qm_detach_gdquots(mp);
560
561       again:
562         nmisses = 0;
563         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
564         /*
565          * Try to get rid of all of the unwanted dquots. The idea is to
566          * get them off mplist and hashlist, but leave them on freelist.
567          */
568         dqp = XFS_QI_MPLNEXT(mp);
569         while (dqp) {
570                 /*
571                  * It's OK to look at the type without taking dqlock here.
572                  * We're holding the mplist lock here, and that's needed for
573                  * a dqreclaim.
574                  */
575                 if ((dqp->dq_flags & dqtype) == 0) {
576                         dqp = dqp->MPL_NEXT;
577                         continue;
578                 }
579
580                 if (! xfs_qm_dqhashlock_nowait(dqp)) {
581                         nrecl = XFS_QI_MPLRECLAIMS(mp);
582                         xfs_qm_mplist_unlock(mp);
583                         XFS_DQ_HASH_LOCK(dqp->q_hash);
584                         xfs_qm_mplist_lock(mp);
585
586                         /*
587                          * XXXTheoretically, we can get into a very long
588                          * ping pong game here.
589                          * No one can be adding dquots to the mplist at
590                          * this point, but somebody might be taking things off.
591                          */
592                         if (nrecl != XFS_QI_MPLRECLAIMS(mp)) {
593                                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
594                                 goto again;
595                         }
596                 }
597
598                 /*
599                  * Take the dquot off the mplist and hashlist. It may remain on
600                  * freelist in INACTIVE state.
601                  */
602                 nextdqp = dqp->MPL_NEXT;
603                 nmisses += xfs_qm_dqpurge(dqp);
604                 dqp = nextdqp;
605         }
606         xfs_qm_mplist_unlock(mp);
607         return nmisses;
608 }
609
610 int
611 xfs_qm_dqpurge_all(
612         xfs_mount_t     *mp,
613         uint            flags)
614 {
615         int             ndquots;
616
617         /*
618          * Purge the dquot cache.
619          * None of the dquots should really be busy at this point.
620          */
621         if (mp->m_quotainfo) {
622                 while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
623                         delay(ndquots * 10);
624                 }
625         }
626         return 0;
627 }
628
629 STATIC int
630 xfs_qm_dqattach_one(
631         xfs_inode_t     *ip,
632         xfs_dqid_t      id,
633         uint            type,
634         uint            doalloc,
635         uint            dolock,
636         xfs_dquot_t     *udqhint, /* hint */
637         xfs_dquot_t     **IO_idqpp)
638 {
639         xfs_dquot_t     *dqp;
640         int             error;
641
642         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
643         error = 0;
644         /*
645          * See if we already have it in the inode itself. IO_idqpp is
646          * &i_udquot or &i_gdquot. This made the code look weird, but
647          * made the logic a lot simpler.
648          */
649         if ((dqp = *IO_idqpp)) {
650                 if (dolock)
651                         xfs_dqlock(dqp);
652                 xfs_dqtrace_entry(dqp, "DQATTACH: found in ip");
653                 goto done;
654         }
655
656         /*
657          * udqhint is the i_udquot field in inode, and is non-NULL only
658          * when the type arg is group/project. Its purpose is to save a
659          * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
660          * the user dquot.
661          */
662         ASSERT(!udqhint || type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
663         if (udqhint && !dolock)
664                 xfs_dqlock(udqhint);
665
666         /*
667          * No need to take dqlock to look at the id.
668          * The ID can't change until it gets reclaimed, and it won't
669          * be reclaimed as long as we have a ref from inode and we hold
670          * the ilock.
671          */
672         if (udqhint &&
673             (dqp = udqhint->q_gdquot) &&
674             (be32_to_cpu(dqp->q_core.d_id) == id)) {
675                 ASSERT(XFS_DQ_IS_LOCKED(udqhint));
676                 xfs_dqlock(dqp);
677                 XFS_DQHOLD(dqp);
678                 ASSERT(*IO_idqpp == NULL);
679                 *IO_idqpp = dqp;
680                 if (!dolock) {
681                         xfs_dqunlock(dqp);
682                         xfs_dqunlock(udqhint);
683                 }
684                 goto done;
685         }
686         /*
687          * We can't hold a dquot lock when we call the dqget code.
688          * We'll deadlock in no time, because of (not conforming to)
689          * lock ordering - the inodelock comes before any dquot lock,
690          * and we may drop and reacquire the ilock in xfs_qm_dqget().
691          */
692         if (udqhint)
693                 xfs_dqunlock(udqhint);
694         /*
695          * Find the dquot from somewhere. This bumps the
696          * reference count of dquot and returns it locked.
697          * This can return ENOENT if dquot didn't exist on
698          * disk and we didn't ask it to allocate;
699          * ESRCH if quotas got turned off suddenly.
700          */
701         if ((error = xfs_qm_dqget(ip->i_mount, ip, id, type,
702                                  doalloc|XFS_QMOPT_DOWARN, &dqp))) {
703                 if (udqhint && dolock)
704                         xfs_dqlock(udqhint);
705                 goto done;
706         }
707
708         xfs_dqtrace_entry(dqp, "DQATTACH: found by dqget");
709         /*
710          * dqget may have dropped and re-acquired the ilock, but it guarantees
711          * that the dquot returned is the one that should go in the inode.
712          */
713         *IO_idqpp = dqp;
714         ASSERT(dqp);
715         ASSERT(XFS_DQ_IS_LOCKED(dqp));
716         if (! dolock) {
717                 xfs_dqunlock(dqp);
718                 goto done;
719         }
720         if (! udqhint)
721                 goto done;
722
723         ASSERT(udqhint);
724         ASSERT(dolock);
725         ASSERT(XFS_DQ_IS_LOCKED(dqp));
726         if (! xfs_qm_dqlock_nowait(udqhint)) {
727                 xfs_dqunlock(dqp);
728                 xfs_dqlock(udqhint);
729                 xfs_dqlock(dqp);
730         }
731       done:
732 #ifdef QUOTADEBUG
733         if (udqhint) {
734                 if (dolock)
735                         ASSERT(XFS_DQ_IS_LOCKED(udqhint));
736         }
737         if (! error) {
738                 if (dolock)
739                         ASSERT(XFS_DQ_IS_LOCKED(dqp));
740         }
741 #endif
742         return error;
743 }
744
745
746 /*
747  * Given a udquot and gdquot, attach a ptr to the group dquot in the
748  * udquot as a hint for future lookups. The idea sounds simple, but the
749  * execution isn't, because the udquot might have a group dquot attached
750  * already and getting rid of that gets us into lock ordering constraints.
751  * The process is complicated more by the fact that the dquots may or may not
752  * be locked on entry.
753  */
754 STATIC void
755 xfs_qm_dqattach_grouphint(
756         xfs_dquot_t     *udq,
757         xfs_dquot_t     *gdq,
758         uint            locked)
759 {
760         xfs_dquot_t     *tmp;
761
762 #ifdef QUOTADEBUG
763         if (locked) {
764                 ASSERT(XFS_DQ_IS_LOCKED(udq));
765                 ASSERT(XFS_DQ_IS_LOCKED(gdq));
766         }
767 #endif
768         if (! locked)
769                 xfs_dqlock(udq);
770
771         if ((tmp = udq->q_gdquot)) {
772                 if (tmp == gdq) {
773                         if (! locked)
774                                 xfs_dqunlock(udq);
775                         return;
776                 }
777
778                 udq->q_gdquot = NULL;
779                 /*
780                  * We can't keep any dqlocks when calling dqrele,
781                  * because the freelist lock comes before dqlocks.
782                  */
783                 xfs_dqunlock(udq);
784                 if (locked)
785                         xfs_dqunlock(gdq);
786                 /*
787                  * we took a hard reference once upon a time in dqget,
788                  * so give it back when the udquot no longer points at it
789                  * dqput() does the unlocking of the dquot.
790                  */
791                 xfs_qm_dqrele(tmp);
792
793                 xfs_dqlock(udq);
794                 xfs_dqlock(gdq);
795
796         } else {
797                 ASSERT(XFS_DQ_IS_LOCKED(udq));
798                 if (! locked) {
799                         xfs_dqlock(gdq);
800                 }
801         }
802
803         ASSERT(XFS_DQ_IS_LOCKED(udq));
804         ASSERT(XFS_DQ_IS_LOCKED(gdq));
805         /*
806          * Somebody could have attached a gdquot here,
807          * when we dropped the uqlock. If so, just do nothing.
808          */
809         if (udq->q_gdquot == NULL) {
810                 XFS_DQHOLD(gdq);
811                 udq->q_gdquot = gdq;
812         }
813         if (! locked) {
814                 xfs_dqunlock(gdq);
815                 xfs_dqunlock(udq);
816         }
817 }
818
819
820 /*
821  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
822  * into account.
823  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
824  * If XFS_QMOPT_DQLOCK, the dquot(s) will be returned locked. This option pretty
825  * much made this code a complete mess, but it has been pretty useful.
826  * If XFS_QMOPT_ILOCKED, then inode sent is already locked EXCL.
827  * Inode may get unlocked and relocked in here, and the caller must deal with
828  * the consequences.
829  */
830 int
831 xfs_qm_dqattach(
832         xfs_inode_t     *ip,
833         uint            flags)
834 {
835         xfs_mount_t     *mp = ip->i_mount;
836         uint            nquotas = 0;
837         int             error = 0;
838
839         if ((! XFS_IS_QUOTA_ON(mp)) ||
840             (! XFS_NOT_DQATTACHED(mp, ip)) ||
841             (ip->i_ino == mp->m_sb.sb_uquotino) ||
842             (ip->i_ino == mp->m_sb.sb_gquotino))
843                 return 0;
844
845         ASSERT((flags & XFS_QMOPT_ILOCKED) == 0 ||
846                xfs_isilocked(ip, XFS_ILOCK_EXCL));
847
848         if (! (flags & XFS_QMOPT_ILOCKED))
849                 xfs_ilock(ip, XFS_ILOCK_EXCL);
850
851         if (XFS_IS_UQUOTA_ON(mp)) {
852                 error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
853                                                 flags & XFS_QMOPT_DQALLOC,
854                                                 flags & XFS_QMOPT_DQLOCK,
855                                                 NULL, &ip->i_udquot);
856                 if (error)
857                         goto done;
858                 nquotas++;
859         }
860
861         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
862         if (XFS_IS_OQUOTA_ON(mp)) {
863                 error = XFS_IS_GQUOTA_ON(mp) ?
864                         xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
865                                                 flags & XFS_QMOPT_DQALLOC,
866                                                 flags & XFS_QMOPT_DQLOCK,
867                                                 ip->i_udquot, &ip->i_gdquot) :
868                         xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
869                                                 flags & XFS_QMOPT_DQALLOC,
870                                                 flags & XFS_QMOPT_DQLOCK,
871                                                 ip->i_udquot, &ip->i_gdquot);
872                 /*
873                  * Don't worry about the udquot that we may have
874                  * attached above. It'll get detached, if not already.
875                  */
876                 if (error)
877                         goto done;
878                 nquotas++;
879         }
880
881         /*
882          * Attach this group quota to the user quota as a hint.
883          * This WON'T, in general, result in a thrash.
884          */
885         if (nquotas == 2) {
886                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
887                 ASSERT(ip->i_udquot);
888                 ASSERT(ip->i_gdquot);
889
890                 /*
891                  * We may or may not have the i_udquot locked at this point,
892                  * but this check is OK since we don't depend on the i_gdquot to
893                  * be accurate 100% all the time. It is just a hint, and this
894                  * will succeed in general.
895                  */
896                 if (ip->i_udquot->q_gdquot == ip->i_gdquot)
897                         goto done;
898                 /*
899                  * Attach i_gdquot to the gdquot hint inside the i_udquot.
900                  */
901                 xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot,
902                                          flags & XFS_QMOPT_DQLOCK);
903         }
904
905       done:
906
907 #ifdef QUOTADEBUG
908         if (! error) {
909                 if (ip->i_udquot) {
910                         if (flags & XFS_QMOPT_DQLOCK)
911                                 ASSERT(XFS_DQ_IS_LOCKED(ip->i_udquot));
912                 }
913                 if (ip->i_gdquot) {
914                         if (flags & XFS_QMOPT_DQLOCK)
915                                 ASSERT(XFS_DQ_IS_LOCKED(ip->i_gdquot));
916                 }
917                 if (XFS_IS_UQUOTA_ON(mp))
918                         ASSERT(ip->i_udquot);
919                 if (XFS_IS_OQUOTA_ON(mp))
920                         ASSERT(ip->i_gdquot);
921         }
922 #endif
923
924         if (! (flags & XFS_QMOPT_ILOCKED))
925                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
926
927 #ifdef QUOTADEBUG
928         else
929                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
930 #endif
931         return error;
932 }
933
934 /*
935  * Release dquots (and their references) if any.
936  * The inode should be locked EXCL except when this's called by
937  * xfs_ireclaim.
938  */
939 void
940 xfs_qm_dqdetach(
941         xfs_inode_t     *ip)
942 {
943         if (!(ip->i_udquot || ip->i_gdquot))
944                 return;
945
946         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
947         ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
948         if (ip->i_udquot) {
949                 xfs_dqtrace_entry_ino(ip->i_udquot, "DQDETTACH", ip);
950                 xfs_qm_dqrele(ip->i_udquot);
951                 ip->i_udquot = NULL;
952         }
953         if (ip->i_gdquot) {
954                 xfs_dqtrace_entry_ino(ip->i_gdquot, "DQDETTACH", ip);
955                 xfs_qm_dqrele(ip->i_gdquot);
956                 ip->i_gdquot = NULL;
957         }
958 }
959
960 /*
961  * This is called to sync quotas. We can be told to use non-blocking
962  * semantics by either the SYNC_BDFLUSH flag or the absence of the
963  * SYNC_WAIT flag.
964  */
965 int
966 xfs_qm_sync(
967         xfs_mount_t     *mp,
968         int             flags)
969 {
970         int             recl, restarts;
971         xfs_dquot_t     *dqp;
972         uint            flush_flags;
973         boolean_t       nowait;
974         int             error;
975
976         if (! XFS_IS_QUOTA_ON(mp))
977                 return 0;
978
979         restarts = 0;
980         /*
981          * We won't block unless we are asked to.
982          */
983         nowait = (boolean_t)(flags & SYNC_BDFLUSH || (flags & SYNC_WAIT) == 0);
984
985   again:
986         xfs_qm_mplist_lock(mp);
987         /*
988          * dqpurge_all() also takes the mplist lock and iterate thru all dquots
989          * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
990          * when we have the mplist lock, we know that dquots will be consistent
991          * as long as we have it locked.
992          */
993         if (! XFS_IS_QUOTA_ON(mp)) {
994                 xfs_qm_mplist_unlock(mp);
995                 return 0;
996         }
997         FOREACH_DQUOT_IN_MP(dqp, mp) {
998                 /*
999                  * If this is vfs_sync calling, then skip the dquots that
1000                  * don't 'seem' to be dirty. ie. don't acquire dqlock.
1001                  * This is very similar to what xfs_sync does with inodes.
1002                  */
1003                 if (flags & SYNC_BDFLUSH) {
1004                         if (! XFS_DQ_IS_DIRTY(dqp))
1005                                 continue;
1006                 }
1007
1008                 if (nowait) {
1009                         /*
1010                          * Try to acquire the dquot lock. We are NOT out of
1011                          * lock order, but we just don't want to wait for this
1012                          * lock, unless somebody wanted us to.
1013                          */
1014                         if (! xfs_qm_dqlock_nowait(dqp))
1015                                 continue;
1016                 } else {
1017                         xfs_dqlock(dqp);
1018                 }
1019
1020                 /*
1021                  * Now, find out for sure if this dquot is dirty or not.
1022                  */
1023                 if (! XFS_DQ_IS_DIRTY(dqp)) {
1024                         xfs_dqunlock(dqp);
1025                         continue;
1026                 }
1027
1028                 /* XXX a sentinel would be better */
1029                 recl = XFS_QI_MPLRECLAIMS(mp);
1030                 if (!xfs_dqflock_nowait(dqp)) {
1031                         if (nowait) {
1032                                 xfs_dqunlock(dqp);
1033                                 continue;
1034                         }
1035                         /*
1036                          * If we can't grab the flush lock then if the caller
1037                          * really wanted us to give this our best shot, so
1038                          * see if we can give a push to the buffer before we wait
1039                          * on the flush lock. At this point, we know that
1040                          * even though the dquot is being flushed,
1041                          * it has (new) dirty data.
1042                          */
1043                         xfs_qm_dqflock_pushbuf_wait(dqp);
1044                 }
1045                 /*
1046                  * Let go of the mplist lock. We don't want to hold it
1047                  * across a disk write
1048                  */
1049                 flush_flags = (nowait) ? XFS_QMOPT_DELWRI : XFS_QMOPT_SYNC;
1050                 xfs_qm_mplist_unlock(mp);
1051                 xfs_dqtrace_entry(dqp, "XQM_SYNC: DQFLUSH");
1052                 error = xfs_qm_dqflush(dqp, flush_flags);
1053                 xfs_dqunlock(dqp);
1054                 if (error && XFS_FORCED_SHUTDOWN(mp))
1055                         return 0;       /* Need to prevent umount failure */
1056                 else if (error)
1057                         return error;
1058
1059                 xfs_qm_mplist_lock(mp);
1060                 if (recl != XFS_QI_MPLRECLAIMS(mp)) {
1061                         if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
1062                                 break;
1063
1064                         xfs_qm_mplist_unlock(mp);
1065                         goto again;
1066                 }
1067         }
1068
1069         xfs_qm_mplist_unlock(mp);
1070         return 0;
1071 }
1072
1073 /*
1074  * The hash chains and the mplist use the same xfs_dqhash structure as
1075  * their list head, but we can take the mplist qh_lock and one of the
1076  * hash qh_locks at the same time without any problem as they aren't
1077  * related.
1078  */
1079 static struct lock_class_key xfs_quota_mplist_class;
1080
1081 /*
1082  * This initializes all the quota information that's kept in the
1083  * mount structure
1084  */
1085 STATIC int
1086 xfs_qm_init_quotainfo(
1087         xfs_mount_t     *mp)
1088 {
1089         xfs_quotainfo_t *qinf;
1090         int             error;
1091         xfs_dquot_t     *dqp;
1092
1093         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1094
1095         /*
1096          * Tell XQM that we exist as soon as possible.
1097          */
1098         if ((error = xfs_qm_hold_quotafs_ref(mp))) {
1099                 return error;
1100         }
1101
1102         qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
1103
1104         /*
1105          * See if quotainodes are setup, and if not, allocate them,
1106          * and change the superblock accordingly.
1107          */
1108         if ((error = xfs_qm_init_quotainos(mp))) {
1109                 kmem_free(qinf);
1110                 mp->m_quotainfo = NULL;
1111                 return error;
1112         }
1113
1114         xfs_qm_list_init(&qinf->qi_dqlist, "mpdqlist", 0);
1115         lockdep_set_class(&qinf->qi_dqlist.qh_lock, &xfs_quota_mplist_class);
1116
1117         qinf->qi_dqreclaims = 0;
1118
1119         /* mutex used to serialize quotaoffs */
1120         mutex_init(&qinf->qi_quotaofflock);
1121
1122         /* Precalc some constants */
1123         qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1124         ASSERT(qinf->qi_dqchunklen);
1125         qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
1126         do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
1127
1128         mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
1129
1130         /*
1131          * We try to get the limits from the superuser's limits fields.
1132          * This is quite hacky, but it is standard quota practice.
1133          * We look at the USR dquot with id == 0 first, but if user quotas
1134          * are not enabled we goto the GRP dquot with id == 0.
1135          * We don't really care to keep separate default limits for user
1136          * and group quotas, at least not at this point.
1137          */
1138         error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
1139                              XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : 
1140                              (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
1141                                 XFS_DQ_PROJ),
1142                              XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
1143                              &dqp);
1144         if (! error) {
1145                 xfs_disk_dquot_t        *ddqp = &dqp->q_core;
1146
1147                 /*
1148                  * The warnings and timers set the grace period given to
1149                  * a user or group before he or she can not perform any
1150                  * more writing. If it is zero, a default is used.
1151                  */
1152                 qinf->qi_btimelimit = ddqp->d_btimer ?
1153                         be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
1154                 qinf->qi_itimelimit = ddqp->d_itimer ?
1155                         be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
1156                 qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
1157                         be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
1158                 qinf->qi_bwarnlimit = ddqp->d_bwarns ?
1159                         be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
1160                 qinf->qi_iwarnlimit = ddqp->d_iwarns ?
1161                         be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
1162                 qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
1163                         be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
1164                 qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
1165                 qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
1166                 qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
1167                 qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
1168                 qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
1169                 qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
1170  
1171                 /*
1172                  * We sent the XFS_QMOPT_DQSUSER flag to dqget because
1173                  * we don't want this dquot cached. We haven't done a
1174                  * quotacheck yet, and quotacheck doesn't like incore dquots.
1175                  */
1176                 xfs_qm_dqdestroy(dqp);
1177         } else {
1178                 qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
1179                 qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
1180                 qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
1181                 qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
1182                 qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
1183                 qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
1184         }
1185
1186         return 0;
1187 }
1188
1189
1190 /*
1191  * Gets called when unmounting a filesystem or when all quotas get
1192  * turned off.
1193  * This purges the quota inodes, destroys locks and frees itself.
1194  */
1195 void
1196 xfs_qm_destroy_quotainfo(
1197         xfs_mount_t     *mp)
1198 {
1199         xfs_quotainfo_t *qi;
1200
1201         qi = mp->m_quotainfo;
1202         ASSERT(qi != NULL);
1203         ASSERT(xfs_Gqm != NULL);
1204
1205         /*
1206          * Release the reference that XQM kept, so that we know
1207          * when the XQM structure should be freed. We cannot assume
1208          * that xfs_Gqm is non-null after this point.
1209          */
1210         xfs_qm_rele_quotafs_ref(mp);
1211
1212         xfs_qm_list_destroy(&qi->qi_dqlist);
1213
1214         if (qi->qi_uquotaip) {
1215                 IRELE(qi->qi_uquotaip);
1216                 qi->qi_uquotaip = NULL; /* paranoia */
1217         }
1218         if (qi->qi_gquotaip) {
1219                 IRELE(qi->qi_gquotaip);
1220                 qi->qi_gquotaip = NULL;
1221         }
1222         mutex_destroy(&qi->qi_quotaofflock);
1223         kmem_free(qi);
1224         mp->m_quotainfo = NULL;
1225 }
1226
1227
1228
1229 /* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
1230
1231 /* ARGSUSED */
1232 STATIC void
1233 xfs_qm_list_init(
1234         xfs_dqlist_t    *list,
1235         char            *str,
1236         int             n)
1237 {
1238         mutex_init(&list->qh_lock);
1239         list->qh_next = NULL;
1240         list->qh_version = 0;
1241         list->qh_nelems = 0;
1242 }
1243
1244 STATIC void
1245 xfs_qm_list_destroy(
1246         xfs_dqlist_t    *list)
1247 {
1248         mutex_destroy(&(list->qh_lock));
1249 }
1250
1251
1252 /*
1253  * Stripped down version of dqattach. This doesn't attach, or even look at the
1254  * dquots attached to the inode. The rationale is that there won't be any
1255  * attached at the time this is called from quotacheck.
1256  */
1257 STATIC int
1258 xfs_qm_dqget_noattach(
1259         xfs_inode_t     *ip,
1260         xfs_dquot_t     **O_udqpp,
1261         xfs_dquot_t     **O_gdqpp)
1262 {
1263         int             error;
1264         xfs_mount_t     *mp;
1265         xfs_dquot_t     *udqp, *gdqp;
1266
1267         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1268         mp = ip->i_mount;
1269         udqp = NULL;
1270         gdqp = NULL;
1271
1272         if (XFS_IS_UQUOTA_ON(mp)) {
1273                 ASSERT(ip->i_udquot == NULL);
1274                 /*
1275                  * We want the dquot allocated if it doesn't exist.
1276                  */
1277                 if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER,
1278                                          XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN,
1279                                          &udqp))) {
1280                         /*
1281                          * Shouldn't be able to turn off quotas here.
1282                          */
1283                         ASSERT(error != ESRCH);
1284                         ASSERT(error != ENOENT);
1285                         return error;
1286                 }
1287                 ASSERT(udqp);
1288         }
1289
1290         if (XFS_IS_OQUOTA_ON(mp)) {
1291                 ASSERT(ip->i_gdquot == NULL);
1292                 if (udqp)
1293                         xfs_dqunlock(udqp);
1294                 error = XFS_IS_GQUOTA_ON(mp) ?
1295                                 xfs_qm_dqget(mp, ip,
1296                                              ip->i_d.di_gid, XFS_DQ_GROUP,
1297                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1298                                              &gdqp) :
1299                                 xfs_qm_dqget(mp, ip,
1300                                              ip->i_d.di_projid, XFS_DQ_PROJ,
1301                                              XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN,
1302                                              &gdqp);
1303                 if (error) {
1304                         if (udqp)
1305                                 xfs_qm_dqrele(udqp);
1306                         ASSERT(error != ESRCH);
1307                         ASSERT(error != ENOENT);
1308                         return error;
1309                 }
1310                 ASSERT(gdqp);
1311
1312                 /* Reacquire the locks in the right order */
1313                 if (udqp) {
1314                         if (! xfs_qm_dqlock_nowait(udqp)) {
1315                                 xfs_dqunlock(gdqp);
1316                                 xfs_dqlock(udqp);
1317                                 xfs_dqlock(gdqp);
1318                         }
1319                 }
1320         }
1321
1322         *O_udqpp = udqp;
1323         *O_gdqpp = gdqp;
1324
1325 #ifdef QUOTADEBUG
1326         if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp));
1327         if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp));
1328 #endif
1329         return 0;
1330 }
1331
1332 /*
1333  * Create an inode and return with a reference already taken, but unlocked
1334  * This is how we create quota inodes
1335  */
1336 STATIC int
1337 xfs_qm_qino_alloc(
1338         xfs_mount_t     *mp,
1339         xfs_inode_t     **ip,
1340         __int64_t       sbfields,
1341         uint            flags)
1342 {
1343         xfs_trans_t     *tp;
1344         int             error;
1345         int             committed;
1346
1347         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
1348         if ((error = xfs_trans_reserve(tp,
1349                                       XFS_QM_QINOCREATE_SPACE_RES(mp),
1350                                       XFS_CREATE_LOG_RES(mp), 0,
1351                                       XFS_TRANS_PERM_LOG_RES,
1352                                       XFS_CREATE_LOG_COUNT))) {
1353                 xfs_trans_cancel(tp, 0);
1354                 return error;
1355         }
1356
1357         if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0,
1358                                    &xfs_zerocr, 0, 1, ip, &committed))) {
1359                 xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1360                                  XFS_TRANS_ABORT);
1361                 return error;
1362         }
1363
1364         /*
1365          * Keep an extra reference to this quota inode. This inode is
1366          * locked exclusively and joined to the transaction already.
1367          */
1368         ASSERT(xfs_isilocked(*ip, XFS_ILOCK_EXCL));
1369         IHOLD(*ip);
1370
1371         /*
1372          * Make the changes in the superblock, and log those too.
1373          * sbfields arg may contain fields other than *QUOTINO;
1374          * VERSIONNUM for example.
1375          */
1376         spin_lock(&mp->m_sb_lock);
1377         if (flags & XFS_QMOPT_SBVERSION) {
1378 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1379                 unsigned oldv = mp->m_sb.sb_versionnum;
1380 #endif
1381                 ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1382                 ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1383                                    XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
1384                        (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1385                         XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
1386
1387                 xfs_sb_version_addquota(&mp->m_sb);
1388                 mp->m_sb.sb_uquotino = NULLFSINO;
1389                 mp->m_sb.sb_gquotino = NULLFSINO;
1390
1391                 /* qflags will get updated _after_ quotacheck */
1392                 mp->m_sb.sb_qflags = 0;
1393 #if defined(DEBUG) && defined(XFS_LOUD_RECOVERY)
1394                 cmn_err(CE_NOTE,
1395                         "Old superblock version %x, converting to %x.",
1396                         oldv, mp->m_sb.sb_versionnum);
1397 #endif
1398         }
1399         if (flags & XFS_QMOPT_UQUOTA)
1400                 mp->m_sb.sb_uquotino = (*ip)->i_ino;
1401         else
1402                 mp->m_sb.sb_gquotino = (*ip)->i_ino;
1403         spin_unlock(&mp->m_sb_lock);
1404         xfs_mod_sb(tp, sbfields);
1405
1406         if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1407                 xfs_fs_cmn_err(CE_ALERT, mp, "XFS qino_alloc failed!");
1408                 return error;
1409         }
1410         return 0;
1411 }
1412
1413
1414 STATIC void
1415 xfs_qm_reset_dqcounts(
1416         xfs_mount_t     *mp,
1417         xfs_buf_t       *bp,
1418         xfs_dqid_t      id,
1419         uint            type)
1420 {
1421         xfs_disk_dquot_t        *ddq;
1422         int                     j;
1423
1424         xfs_buftrace("RESET DQUOTS", bp);
1425         /*
1426          * Reset all counters and timers. They'll be
1427          * started afresh by xfs_qm_quotacheck.
1428          */
1429 #ifdef DEBUG
1430         j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1431         do_div(j, sizeof(xfs_dqblk_t));
1432         ASSERT(XFS_QM_DQPERBLK(mp) == j);
1433 #endif
1434         ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
1435         for (j = 0; j < XFS_QM_DQPERBLK(mp); j++) {
1436                 /*
1437                  * Do a sanity check, and if needed, repair the dqblk. Don't
1438                  * output any warnings because it's perfectly possible to
1439                  * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
1440                  */
1441                 (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1442                                       "xfs_quotacheck");
1443                 ddq->d_bcount = 0;
1444                 ddq->d_icount = 0;
1445                 ddq->d_rtbcount = 0;
1446                 ddq->d_btimer = 0;
1447                 ddq->d_itimer = 0;
1448                 ddq->d_rtbtimer = 0;
1449                 ddq->d_bwarns = 0;
1450                 ddq->d_iwarns = 0;
1451                 ddq->d_rtbwarns = 0;
1452                 ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
1453         }
1454 }
1455
1456 STATIC int
1457 xfs_qm_dqiter_bufs(
1458         xfs_mount_t     *mp,
1459         xfs_dqid_t      firstid,
1460         xfs_fsblock_t   bno,
1461         xfs_filblks_t   blkcnt,
1462         uint            flags)
1463 {
1464         xfs_buf_t       *bp;
1465         int             error;
1466         int             notcommitted;
1467         int             incr;
1468         int             type;
1469
1470         ASSERT(blkcnt > 0);
1471         notcommitted = 0;
1472         incr = (blkcnt > XFS_QM_MAX_DQCLUSTER_LOGSZ) ?
1473                 XFS_QM_MAX_DQCLUSTER_LOGSZ : blkcnt;
1474         type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1475                 (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1476         error = 0;
1477
1478         /*
1479          * Blkcnt arg can be a very big number, and might even be
1480          * larger than the log itself. So, we have to break it up into
1481          * manageable-sized transactions.
1482          * Note that we don't start a permanent transaction here; we might
1483          * not be able to get a log reservation for the whole thing up front,
1484          * and we don't really care to either, because we just discard
1485          * everything if we were to crash in the middle of this loop.
1486          */
1487         while (blkcnt--) {
1488                 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1489                               XFS_FSB_TO_DADDR(mp, bno),
1490                               (int)XFS_QI_DQCHUNKLEN(mp), 0, &bp);
1491                 if (error)
1492                         break;
1493
1494                 xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1495                 xfs_bdwrite(mp, bp);
1496                 /*
1497                  * goto the next block.
1498                  */
1499                 bno++;
1500                 firstid += XFS_QM_DQPERBLK(mp);
1501         }
1502         return error;
1503 }
1504
1505 /*
1506  * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1507  * caller supplied function for every chunk of dquots that we find.
1508  */
1509 STATIC int
1510 xfs_qm_dqiterate(
1511         xfs_mount_t     *mp,
1512         xfs_inode_t     *qip,
1513         uint            flags)
1514 {
1515         xfs_bmbt_irec_t         *map;
1516         int                     i, nmaps;       /* number of map entries */
1517         int                     error;          /* return value */
1518         xfs_fileoff_t           lblkno;
1519         xfs_filblks_t           maxlblkcnt;
1520         xfs_dqid_t              firstid;
1521         xfs_fsblock_t           rablkno;
1522         xfs_filblks_t           rablkcnt;
1523
1524         error = 0;
1525         /*
1526          * This looks racy, but we can't keep an inode lock across a
1527          * trans_reserve. But, this gets called during quotacheck, and that
1528          * happens only at mount time which is single threaded.
1529          */
1530         if (qip->i_d.di_nblocks == 0)
1531                 return 0;
1532
1533         map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1534
1535         lblkno = 0;
1536         maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
1537         do {
1538                 nmaps = XFS_DQITER_MAP_SIZE;
1539                 /*
1540                  * We aren't changing the inode itself. Just changing
1541                  * some of its data. No new blocks are added here, and
1542                  * the inode is never added to the transaction.
1543                  */
1544                 xfs_ilock(qip, XFS_ILOCK_SHARED);
1545                 error = xfs_bmapi(NULL, qip, lblkno,
1546                                   maxlblkcnt - lblkno,
1547                                   XFS_BMAPI_METADATA,
1548                                   NULL,
1549                                   0, map, &nmaps, NULL, NULL);
1550                 xfs_iunlock(qip, XFS_ILOCK_SHARED);
1551                 if (error)
1552                         break;
1553
1554                 ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1555                 for (i = 0; i < nmaps; i++) {
1556                         ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1557                         ASSERT(map[i].br_blockcount);
1558
1559
1560                         lblkno += map[i].br_blockcount;
1561
1562                         if (map[i].br_startblock == HOLESTARTBLOCK)
1563                                 continue;
1564
1565                         firstid = (xfs_dqid_t) map[i].br_startoff *
1566                                 XFS_QM_DQPERBLK(mp);
1567                         /*
1568                          * Do a read-ahead on the next extent.
1569                          */
1570                         if ((i+1 < nmaps) &&
1571                             (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1572                                 rablkcnt =  map[i+1].br_blockcount;
1573                                 rablkno = map[i+1].br_startblock;
1574                                 while (rablkcnt--) {
1575                                         xfs_baread(mp->m_ddev_targp,
1576                                                XFS_FSB_TO_DADDR(mp, rablkno),
1577                                                (int)XFS_QI_DQCHUNKLEN(mp));
1578                                         rablkno++;
1579                                 }
1580                         }
1581                         /*
1582                          * Iterate thru all the blks in the extent and
1583                          * reset the counters of all the dquots inside them.
1584                          */
1585                         if ((error = xfs_qm_dqiter_bufs(mp,
1586                                                        firstid,
1587                                                        map[i].br_startblock,
1588                                                        map[i].br_blockcount,
1589                                                        flags))) {
1590                                 break;
1591                         }
1592                 }
1593
1594                 if (error)
1595                         break;
1596         } while (nmaps > 0);
1597
1598         kmem_free(map);
1599
1600         return error;
1601 }
1602
1603 /*
1604  * Called by dqusage_adjust in doing a quotacheck.
1605  * Given the inode, and a dquot (either USR or GRP, doesn't matter),
1606  * this updates its incore copy as well as the buffer copy. This is
1607  * so that once the quotacheck is done, we can just log all the buffers,
1608  * as opposed to logging numerous updates to individual dquots.
1609  */
1610 STATIC void
1611 xfs_qm_quotacheck_dqadjust(
1612         xfs_dquot_t             *dqp,
1613         xfs_qcnt_t              nblks,
1614         xfs_qcnt_t              rtblks)
1615 {
1616         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1617         xfs_dqtrace_entry(dqp, "QCHECK DQADJUST");
1618         /*
1619          * Adjust the inode count and the block count to reflect this inode's
1620          * resource usage.
1621          */
1622         be64_add_cpu(&dqp->q_core.d_icount, 1);
1623         dqp->q_res_icount++;
1624         if (nblks) {
1625                 be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1626                 dqp->q_res_bcount += nblks;
1627         }
1628         if (rtblks) {
1629                 be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1630                 dqp->q_res_rtbcount += rtblks;
1631         }
1632
1633         /*
1634          * Set default limits, adjust timers (since we changed usages)
1635          */
1636         if (! XFS_IS_SUSER_DQUOT(dqp)) {
1637                 xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core);
1638                 xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core);
1639         }
1640
1641         dqp->dq_flags |= XFS_DQ_DIRTY;
1642 }
1643
1644 STATIC int
1645 xfs_qm_get_rtblks(
1646         xfs_inode_t     *ip,
1647         xfs_qcnt_t      *O_rtblks)
1648 {
1649         xfs_filblks_t   rtblks;                 /* total rt blks */
1650         xfs_extnum_t    idx;                    /* extent record index */
1651         xfs_ifork_t     *ifp;                   /* inode fork pointer */
1652         xfs_extnum_t    nextents;               /* number of extent entries */
1653         int             error;
1654
1655         ASSERT(XFS_IS_REALTIME_INODE(ip));
1656         ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1657         if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1658                 if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1659                         return error;
1660         }
1661         rtblks = 0;
1662         nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1663         for (idx = 0; idx < nextents; idx++)
1664                 rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1665         *O_rtblks = (xfs_qcnt_t)rtblks;
1666         return 0;
1667 }
1668
1669 /*
1670  * callback routine supplied to bulkstat(). Given an inumber, find its
1671  * dquots and update them to account for resources taken by that inode.
1672  */
1673 /* ARGSUSED */
1674 STATIC int
1675 xfs_qm_dqusage_adjust(
1676         xfs_mount_t     *mp,            /* mount point for filesystem */
1677         xfs_ino_t       ino,            /* inode number to get data for */
1678         void            __user *buffer, /* not used */
1679         int             ubsize,         /* not used */
1680         void            *private_data,  /* not used */
1681         xfs_daddr_t     bno,            /* starting block of inode cluster */
1682         int             *ubused,        /* not used */
1683         void            *dip,           /* on-disk inode pointer (not used) */
1684         int             *res)           /* result code value */
1685 {
1686         xfs_inode_t     *ip;
1687         xfs_dquot_t     *udqp, *gdqp;
1688         xfs_qcnt_t      nblks, rtblks;
1689         int             error;
1690
1691         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1692
1693         /*
1694          * rootino must have its resources accounted for, not so with the quota
1695          * inodes.
1696          */
1697         if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
1698                 *res = BULKSTAT_RV_NOTHING;
1699                 return XFS_ERROR(EINVAL);
1700         }
1701
1702         /*
1703          * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1704          * interface expects the inode to be exclusively locked because that's
1705          * the case in all other instances. It's OK that we do this because
1706          * quotacheck is done only at mount time.
1707          */
1708         if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip, bno))) {
1709                 *res = BULKSTAT_RV_NOTHING;
1710                 return error;
1711         }
1712
1713         /*
1714          * Obtain the locked dquots. In case of an error (eg. allocation
1715          * fails for ENOSPC), we return the negative of the error number
1716          * to bulkstat, so that it can get propagated to quotacheck() and
1717          * making us disable quotas for the file system.
1718          */
1719         if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) {
1720                 xfs_iput(ip, XFS_ILOCK_EXCL);
1721                 *res = BULKSTAT_RV_GIVEUP;
1722                 return error;
1723         }
1724
1725         rtblks = 0;
1726         if (! XFS_IS_REALTIME_INODE(ip)) {
1727                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks;
1728         } else {
1729                 /*
1730                  * Walk thru the extent list and count the realtime blocks.
1731                  */
1732                 if ((error = xfs_qm_get_rtblks(ip, &rtblks))) {
1733                         xfs_iput(ip, XFS_ILOCK_EXCL);
1734                         if (udqp)
1735                                 xfs_qm_dqput(udqp);
1736                         if (gdqp)
1737                                 xfs_qm_dqput(gdqp);
1738                         *res = BULKSTAT_RV_GIVEUP;
1739                         return error;
1740                 }
1741                 nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1742         }
1743         ASSERT(ip->i_delayed_blks == 0);
1744
1745         /*
1746          * We can't release the inode while holding its dquot locks.
1747          * The inode can go into inactive and might try to acquire the dquotlocks.
1748          * So, just unlock here and do a vn_rele at the end.
1749          */
1750         xfs_iunlock(ip, XFS_ILOCK_EXCL);
1751
1752         /*
1753          * Add the (disk blocks and inode) resources occupied by this
1754          * inode to its dquots. We do this adjustment in the incore dquot,
1755          * and also copy the changes to its buffer.
1756          * We don't care about putting these changes in a transaction
1757          * envelope because if we crash in the middle of a 'quotacheck'
1758          * we have to start from the beginning anyway.
1759          * Once we're done, we'll log all the dquot bufs.
1760          *
1761          * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1762          * and quotaoffs don't race. (Quotachecks happen at mount time only).
1763          */
1764         if (XFS_IS_UQUOTA_ON(mp)) {
1765                 ASSERT(udqp);
1766                 xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks);
1767                 xfs_qm_dqput(udqp);
1768         }
1769         if (XFS_IS_OQUOTA_ON(mp)) {
1770                 ASSERT(gdqp);
1771                 xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks);
1772                 xfs_qm_dqput(gdqp);
1773         }
1774         /*
1775          * Now release the inode. This will send it to 'inactive', and
1776          * possibly even free blocks.
1777          */
1778         IRELE(ip);
1779
1780         /*
1781          * Goto next inode.
1782          */
1783         *res = BULKSTAT_RV_DIDONE;
1784         return 0;
1785 }
1786
1787 /*
1788  * Walk thru all the filesystem inodes and construct a consistent view
1789  * of the disk quota world. If the quotacheck fails, disable quotas.
1790  */
1791 int
1792 xfs_qm_quotacheck(
1793         xfs_mount_t     *mp)
1794 {
1795         int             done, count, error;
1796         xfs_ino_t       lastino;
1797         size_t          structsz;
1798         xfs_inode_t     *uip, *gip;
1799         uint            flags;
1800
1801         count = INT_MAX;
1802         structsz = 1;
1803         lastino = 0;
1804         flags = 0;
1805
1806         ASSERT(XFS_QI_UQIP(mp) || XFS_QI_GQIP(mp));
1807         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1808
1809         /*
1810          * There should be no cached dquots. The (simplistic) quotacheck
1811          * algorithm doesn't like that.
1812          */
1813         ASSERT(XFS_QI_MPLNDQUOTS(mp) == 0);
1814
1815         cmn_err(CE_NOTE, "XFS quotacheck %s: Please wait.", mp->m_fsname);
1816
1817         /*
1818          * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1819          * their counters to zero. We need a clean slate.
1820          * We don't log our changes till later.
1821          */
1822         if ((uip = XFS_QI_UQIP(mp))) {
1823                 if ((error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA)))
1824                         goto error_return;
1825                 flags |= XFS_UQUOTA_CHKD;
1826         }
1827
1828         if ((gip = XFS_QI_GQIP(mp))) {
1829                 if ((error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
1830                                         XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA)))
1831                         goto error_return;
1832                 flags |= XFS_OQUOTA_CHKD;
1833         }
1834
1835         do {
1836                 /*
1837                  * Iterate thru all the inodes in the file system,
1838                  * adjusting the corresponding dquot counters in core.
1839                  */
1840                 if ((error = xfs_bulkstat(mp, &lastino, &count,
1841                                      xfs_qm_dqusage_adjust, NULL,
1842                                      structsz, NULL, BULKSTAT_FG_IGET, &done)))
1843                         break;
1844
1845         } while (! done);
1846
1847         /*
1848          * We've made all the changes that we need to make incore.
1849          * Flush them down to disk buffers if everything was updated
1850          * successfully.
1851          */
1852         if (!error)
1853                 error = xfs_qm_dqflush_all(mp, XFS_QMOPT_DELWRI);
1854
1855         /*
1856          * We can get this error if we couldn't do a dquot allocation inside
1857          * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1858          * dirty dquots that might be cached, we just want to get rid of them
1859          * and turn quotaoff. The dquots won't be attached to any of the inodes
1860          * at this point (because we intentionally didn't in dqget_noattach).
1861          */
1862         if (error) {
1863                 xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL | XFS_QMOPT_QUOTAOFF);
1864                 goto error_return;
1865         }
1866
1867         /*
1868          * We didn't log anything, because if we crashed, we'll have to
1869          * start the quotacheck from scratch anyway. However, we must make
1870          * sure that our dquot changes are secure before we put the
1871          * quotacheck'd stamp on the superblock. So, here we do a synchronous
1872          * flush.
1873          */
1874         XFS_bflush(mp->m_ddev_targp);
1875
1876         /*
1877          * If one type of quotas is off, then it will lose its
1878          * quotachecked status, since we won't be doing accounting for
1879          * that type anymore.
1880          */
1881         mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
1882         mp->m_qflags |= flags;
1883
1884         XQM_LIST_PRINT(&(XFS_QI_MPL_LIST(mp)), MPL_NEXT, "++++ Mp list +++");
1885
1886  error_return:
1887         if (error) {
1888                 cmn_err(CE_WARN, "XFS quotacheck %s: Unsuccessful (Error %d): "
1889                         "Disabling quotas.",
1890                         mp->m_fsname, error);
1891                 /*
1892                  * We must turn off quotas.
1893                  */
1894                 ASSERT(mp->m_quotainfo != NULL);
1895                 ASSERT(xfs_Gqm != NULL);
1896                 xfs_qm_destroy_quotainfo(mp);
1897                 if (xfs_mount_reset_sbqflags(mp)) {
1898                         cmn_err(CE_WARN, "XFS quotacheck %s: "
1899                                 "Failed to reset quota flags.", mp->m_fsname);
1900                 }
1901         } else {
1902                 cmn_err(CE_NOTE, "XFS quotacheck %s: Done.", mp->m_fsname);
1903         }
1904         return (error);
1905 }
1906
1907 /*
1908  * This is called after the superblock has been read in and we're ready to
1909  * iget the quota inodes.
1910  */
1911 STATIC int
1912 xfs_qm_init_quotainos(
1913         xfs_mount_t     *mp)
1914 {
1915         xfs_inode_t     *uip, *gip;
1916         int             error;
1917         __int64_t       sbflags;
1918         uint            flags;
1919
1920         ASSERT(mp->m_quotainfo);
1921         uip = gip = NULL;
1922         sbflags = 0;
1923         flags = 0;
1924
1925         /*
1926          * Get the uquota and gquota inodes
1927          */
1928         if (xfs_sb_version_hasquota(&mp->m_sb)) {
1929                 if (XFS_IS_UQUOTA_ON(mp) &&
1930                     mp->m_sb.sb_uquotino != NULLFSINO) {
1931                         ASSERT(mp->m_sb.sb_uquotino > 0);
1932                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1933                                              0, 0, &uip, 0)))
1934                                 return XFS_ERROR(error);
1935                 }
1936                 if (XFS_IS_OQUOTA_ON(mp) &&
1937                     mp->m_sb.sb_gquotino != NULLFSINO) {
1938                         ASSERT(mp->m_sb.sb_gquotino > 0);
1939                         if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1940                                              0, 0, &gip, 0))) {
1941                                 if (uip)
1942                                         IRELE(uip);
1943                                 return XFS_ERROR(error);
1944                         }
1945                 }
1946         } else {
1947                 flags |= XFS_QMOPT_SBVERSION;
1948                 sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1949                             XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
1950         }
1951
1952         /*
1953          * Create the two inodes, if they don't exist already. The changes
1954          * made above will get added to a transaction and logged in one of
1955          * the qino_alloc calls below.  If the device is readonly,
1956          * temporarily switch to read-write to do this.
1957          */
1958         if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1959                 if ((error = xfs_qm_qino_alloc(mp, &uip,
1960                                               sbflags | XFS_SB_UQUOTINO,
1961                                               flags | XFS_QMOPT_UQUOTA)))
1962                         return XFS_ERROR(error);
1963
1964                 flags &= ~XFS_QMOPT_SBVERSION;
1965         }
1966         if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
1967                 flags |= (XFS_IS_GQUOTA_ON(mp) ?
1968                                 XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
1969                 error = xfs_qm_qino_alloc(mp, &gip,
1970                                           sbflags | XFS_SB_GQUOTINO, flags);
1971                 if (error) {
1972                         if (uip)
1973                                 IRELE(uip);
1974
1975                         return XFS_ERROR(error);
1976                 }
1977         }
1978
1979         XFS_QI_UQIP(mp) = uip;
1980         XFS_QI_GQIP(mp) = gip;
1981
1982         return 0;
1983 }
1984
1985
1986 /*
1987  * Traverse the freelist of dquots and attempt to reclaim a maximum of
1988  * 'howmany' dquots. This operation races with dqlookup(), and attempts to
1989  * favor the lookup function ...
1990  * XXXsup merge this with qm_reclaim_one().
1991  */
1992 STATIC int
1993 xfs_qm_shake_freelist(
1994         int howmany)
1995 {
1996         int             nreclaimed;
1997         xfs_dqhash_t    *hash;
1998         xfs_dquot_t     *dqp, *nextdqp;
1999         int             restarts;
2000         int             nflushes;
2001
2002         if (howmany <= 0)
2003                 return 0;
2004
2005         nreclaimed = 0;
2006         restarts = 0;
2007         nflushes = 0;
2008
2009 #ifdef QUOTADEBUG
2010         cmn_err(CE_DEBUG, "Shake free 0x%x", howmany);
2011 #endif
2012         /* lock order is : hashchainlock, freelistlock, mplistlock */
2013  tryagain:
2014         xfs_qm_freelist_lock(xfs_Gqm);
2015
2016         for (dqp = xfs_Gqm->qm_dqfreelist.qh_next;
2017              ((dqp != (xfs_dquot_t *) &xfs_Gqm->qm_dqfreelist) &&
2018               nreclaimed < howmany); ) {
2019                 xfs_dqlock(dqp);
2020
2021                 /*
2022                  * We are racing with dqlookup here. Naturally we don't
2023                  * want to reclaim a dquot that lookup wants.
2024                  */
2025                 if (dqp->dq_flags & XFS_DQ_WANT) {
2026                         xfs_dqunlock(dqp);
2027                         xfs_qm_freelist_unlock(xfs_Gqm);
2028                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2029                                 return nreclaimed;
2030                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2031                         goto tryagain;
2032                 }
2033
2034                 /*
2035                  * If the dquot is inactive, we are assured that it is
2036                  * not on the mplist or the hashlist, and that makes our
2037                  * life easier.
2038                  */
2039                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2040                         ASSERT(dqp->q_mount == NULL);
2041                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2042                         ASSERT(dqp->HL_PREVP == NULL);
2043                         ASSERT(dqp->MPL_PREVP == NULL);
2044                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2045                         nextdqp = dqp->dq_flnext;
2046                         goto off_freelist;
2047                 }
2048
2049                 ASSERT(dqp->MPL_PREVP);
2050                 /*
2051                  * Try to grab the flush lock. If this dquot is in the process of
2052                  * getting flushed to disk, we don't want to reclaim it.
2053                  */
2054                 if (!xfs_dqflock_nowait(dqp)) {
2055                         xfs_dqunlock(dqp);
2056                         dqp = dqp->dq_flnext;
2057                         continue;
2058                 }
2059
2060                 /*
2061                  * We have the flush lock so we know that this is not in the
2062                  * process of being flushed. So, if this is dirty, flush it
2063                  * DELWRI so that we don't get a freelist infested with
2064                  * dirty dquots.
2065                  */
2066                 if (XFS_DQ_IS_DIRTY(dqp)) {
2067                         int     error;
2068                         xfs_dqtrace_entry(dqp, "DQSHAKE: DQDIRTY");
2069                         /*
2070                          * We flush it delayed write, so don't bother
2071                          * releasing the mplock.
2072                          */
2073                         error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2074                         if (error) {
2075                                 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2076                         "xfs_qm_dqflush_all: dquot %p flush failed", dqp);
2077                         }
2078                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2079                         dqp = dqp->dq_flnext;
2080                         continue;
2081                 }
2082                 /*
2083                  * We're trying to get the hashlock out of order. This races
2084                  * with dqlookup; so, we giveup and goto the next dquot if
2085                  * we couldn't get the hashlock. This way, we won't starve
2086                  * a dqlookup process that holds the hashlock that is
2087                  * waiting for the freelist lock.
2088                  */
2089                 if (! xfs_qm_dqhashlock_nowait(dqp)) {
2090                         xfs_dqfunlock(dqp);
2091                         xfs_dqunlock(dqp);
2092                         dqp = dqp->dq_flnext;
2093                         continue;
2094                 }
2095                 /*
2096                  * This races with dquot allocation code as well as dqflush_all
2097                  * and reclaim code. So, if we failed to grab the mplist lock,
2098                  * giveup everything and start over.
2099                  */
2100                 hash = dqp->q_hash;
2101                 ASSERT(hash);
2102                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2103                         /* XXX put a sentinel so that we can come back here */
2104                         xfs_dqfunlock(dqp);
2105                         xfs_dqunlock(dqp);
2106                         XFS_DQ_HASH_UNLOCK(hash);
2107                         xfs_qm_freelist_unlock(xfs_Gqm);
2108                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2109                                 return nreclaimed;
2110                         goto tryagain;
2111                 }
2112                 xfs_dqtrace_entry(dqp, "DQSHAKE: UNLINKING");
2113 #ifdef QUOTADEBUG
2114                 cmn_err(CE_DEBUG, "Shake 0x%p, ID 0x%x\n",
2115                         dqp, be32_to_cpu(dqp->q_core.d_id));
2116 #endif
2117                 ASSERT(dqp->q_nrefs == 0);
2118                 nextdqp = dqp->dq_flnext;
2119                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2120                 XQM_HASHLIST_REMOVE(hash, dqp);
2121                 xfs_dqfunlock(dqp);
2122                 xfs_qm_mplist_unlock(dqp->q_mount);
2123                 XFS_DQ_HASH_UNLOCK(hash);
2124
2125  off_freelist:
2126                 XQM_FREELIST_REMOVE(dqp);
2127                 xfs_dqunlock(dqp);
2128                 nreclaimed++;
2129                 XQM_STATS_INC(xqmstats.xs_qm_dqshake_reclaims);
2130                 xfs_qm_dqdestroy(dqp);
2131                 dqp = nextdqp;
2132         }
2133         xfs_qm_freelist_unlock(xfs_Gqm);
2134         return nreclaimed;
2135 }
2136
2137
2138 /*
2139  * The kmem_shake interface is invoked when memory is running low.
2140  */
2141 /* ARGSUSED */
2142 STATIC int
2143 xfs_qm_shake(int nr_to_scan, gfp_t gfp_mask)
2144 {
2145         int     ndqused, nfree, n;
2146
2147         if (!kmem_shake_allow(gfp_mask))
2148                 return 0;
2149         if (!xfs_Gqm)
2150                 return 0;
2151
2152         nfree = xfs_Gqm->qm_dqfreelist.qh_nelems; /* free dquots */
2153         /* incore dquots in all f/s's */
2154         ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
2155
2156         ASSERT(ndqused >= 0);
2157
2158         if (nfree <= ndqused && nfree < ndquot)
2159                 return 0;
2160
2161         ndqused *= xfs_Gqm->qm_dqfree_ratio;    /* target # of free dquots */
2162         n = nfree - ndqused - ndquot;           /* # over target */
2163
2164         return xfs_qm_shake_freelist(MAX(nfree, n));
2165 }
2166
2167
2168 /*
2169  * Just pop the least recently used dquot off the freelist and
2170  * recycle it. The returned dquot is locked.
2171  */
2172 STATIC xfs_dquot_t *
2173 xfs_qm_dqreclaim_one(void)
2174 {
2175         xfs_dquot_t     *dqpout;
2176         xfs_dquot_t     *dqp;
2177         int             restarts;
2178         int             nflushes;
2179
2180         restarts = 0;
2181         dqpout = NULL;
2182         nflushes = 0;
2183
2184         /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
2185  startagain:
2186         xfs_qm_freelist_lock(xfs_Gqm);
2187
2188         FOREACH_DQUOT_IN_FREELIST(dqp, &(xfs_Gqm->qm_dqfreelist)) {
2189                 xfs_dqlock(dqp);
2190
2191                 /*
2192                  * We are racing with dqlookup here. Naturally we don't
2193                  * want to reclaim a dquot that lookup wants. We release the
2194                  * freelist lock and start over, so that lookup will grab
2195                  * both the dquot and the freelistlock.
2196                  */
2197                 if (dqp->dq_flags & XFS_DQ_WANT) {
2198                         ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
2199                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQWANT");
2200                         xfs_dqunlock(dqp);
2201                         xfs_qm_freelist_unlock(xfs_Gqm);
2202                         if (++restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
2203                                 return NULL;
2204                         XQM_STATS_INC(xqmstats.xs_qm_dqwants);
2205                         goto startagain;
2206                 }
2207
2208                 /*
2209                  * If the dquot is inactive, we are assured that it is
2210                  * not on the mplist or the hashlist, and that makes our
2211                  * life easier.
2212                  */
2213                 if (dqp->dq_flags & XFS_DQ_INACTIVE) {
2214                         ASSERT(dqp->q_mount == NULL);
2215                         ASSERT(! XFS_DQ_IS_DIRTY(dqp));
2216                         ASSERT(dqp->HL_PREVP == NULL);
2217                         ASSERT(dqp->MPL_PREVP == NULL);
2218                         XQM_FREELIST_REMOVE(dqp);
2219                         xfs_dqunlock(dqp);
2220                         dqpout = dqp;
2221                         XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
2222                         break;
2223                 }
2224
2225                 ASSERT(dqp->q_hash);
2226                 ASSERT(dqp->MPL_PREVP);
2227
2228                 /*
2229                  * Try to grab the flush lock. If this dquot is in the process of
2230                  * getting flushed to disk, we don't want to reclaim it.
2231                  */
2232                 if (!xfs_dqflock_nowait(dqp)) {
2233                         xfs_dqunlock(dqp);
2234                         continue;
2235                 }
2236
2237                 /*
2238                  * We have the flush lock so we know that this is not in the
2239                  * process of being flushed. So, if this is dirty, flush it
2240                  * DELWRI so that we don't get a freelist infested with
2241                  * dirty dquots.
2242                  */
2243                 if (XFS_DQ_IS_DIRTY(dqp)) {
2244                         int     error;
2245                         xfs_dqtrace_entry(dqp, "DQRECLAIM: DQDIRTY");
2246                         /*
2247                          * We flush it delayed write, so don't bother
2248                          * releasing the freelist lock.
2249                          */
2250                         error = xfs_qm_dqflush(dqp, XFS_QMOPT_DELWRI);
2251                         if (error) {
2252                                 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
2253                         "xfs_qm_dqreclaim: dquot %p flush failed", dqp);
2254                         }
2255                         xfs_dqunlock(dqp); /* dqflush unlocks dqflock */
2256                         continue;
2257                 }
2258
2259                 if (! xfs_qm_mplist_nowait(dqp->q_mount)) {
2260                         xfs_dqfunlock(dqp);
2261                         xfs_dqunlock(dqp);
2262                         continue;
2263                 }
2264
2265                 if (! xfs_qm_dqhashlock_nowait(dqp))
2266                         goto mplistunlock;
2267
2268                 ASSERT(dqp->q_nrefs == 0);
2269                 xfs_dqtrace_entry(dqp, "DQRECLAIM: UNLINKING");
2270                 XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(dqp->q_mount)), dqp);
2271                 XQM_HASHLIST_REMOVE(dqp->q_hash, dqp);
2272                 XQM_FREELIST_REMOVE(dqp);
2273                 dqpout = dqp;
2274                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
2275  mplistunlock:
2276                 xfs_qm_mplist_unlock(dqp->q_mount);
2277                 xfs_dqfunlock(dqp);
2278                 xfs_dqunlock(dqp);
2279                 if (dqpout)
2280                         break;
2281         }
2282
2283         xfs_qm_freelist_unlock(xfs_Gqm);
2284         return dqpout;
2285 }
2286
2287
2288 /*------------------------------------------------------------------*/
2289
2290 /*
2291  * Return a new incore dquot. Depending on the number of
2292  * dquots in the system, we either allocate a new one on the kernel heap,
2293  * or reclaim a free one.
2294  * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
2295  * to reclaim an existing one from the freelist.
2296  */
2297 boolean_t
2298 xfs_qm_dqalloc_incore(
2299         xfs_dquot_t **O_dqpp)
2300 {
2301         xfs_dquot_t     *dqp;
2302
2303         /*
2304          * Check against high water mark to see if we want to pop
2305          * a nincompoop dquot off the freelist.
2306          */
2307         if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
2308                 /*
2309                  * Try to recycle a dquot from the freelist.
2310                  */
2311                 if ((dqp = xfs_qm_dqreclaim_one())) {
2312                         XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
2313                         /*
2314                          * Just zero the core here. The rest will get
2315                          * reinitialized by caller. XXX we shouldn't even
2316                          * do this zero ...
2317                          */
2318                         memset(&dqp->q_core, 0, sizeof(dqp->q_core));
2319                         *O_dqpp = dqp;
2320                         return B_FALSE;
2321                 }
2322                 XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
2323         }
2324
2325         /*
2326          * Allocate a brand new dquot on the kernel heap and return it
2327          * to the caller to initialize.
2328          */
2329         ASSERT(xfs_Gqm->qm_dqzone != NULL);
2330         *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
2331         atomic_inc(&xfs_Gqm->qm_totaldquots);
2332
2333         return B_TRUE;
2334 }
2335
2336
2337 /*
2338  * Start a transaction and write the incore superblock changes to
2339  * disk. flags parameter indicates which fields have changed.
2340  */
2341 int
2342 xfs_qm_write_sb_changes(
2343         xfs_mount_t     *mp,
2344         __int64_t       flags)
2345 {
2346         xfs_trans_t     *tp;
2347         int             error;
2348
2349 #ifdef QUOTADEBUG
2350         cmn_err(CE_NOTE, "Writing superblock quota changes :%s", mp->m_fsname);
2351 #endif
2352         tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
2353         if ((error = xfs_trans_reserve(tp, 0,
2354                                       mp->m_sb.sb_sectsize + 128, 0,
2355                                       0,
2356                                       XFS_DEFAULT_LOG_COUNT))) {
2357                 xfs_trans_cancel(tp, 0);
2358                 return error;
2359         }
2360
2361         xfs_mod_sb(tp, flags);
2362         error = xfs_trans_commit(tp, 0);
2363
2364         return error;
2365 }
2366
2367
2368 /* --------------- utility functions for vnodeops ---------------- */
2369
2370
2371 /*
2372  * Given an inode, a uid and gid (from cred_t) make sure that we have
2373  * allocated relevant dquot(s) on disk, and that we won't exceed inode
2374  * quotas by creating this file.
2375  * This also attaches dquot(s) to the given inode after locking it,
2376  * and returns the dquots corresponding to the uid and/or gid.
2377  *
2378  * in   : inode (unlocked)
2379  * out  : udquot, gdquot with references taken and unlocked
2380  */
2381 int
2382 xfs_qm_vop_dqalloc(
2383         xfs_mount_t     *mp,
2384         xfs_inode_t     *ip,
2385         uid_t           uid,
2386         gid_t           gid,
2387         prid_t          prid,
2388         uint            flags,
2389         xfs_dquot_t     **O_udqpp,
2390         xfs_dquot_t     **O_gdqpp)
2391 {
2392         int             error;
2393         xfs_dquot_t     *uq, *gq;
2394         uint            lockflags;
2395
2396         if (!XFS_IS_QUOTA_ON(mp))
2397                 return 0;
2398
2399         lockflags = XFS_ILOCK_EXCL;
2400         xfs_ilock(ip, lockflags);
2401
2402         if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
2403                 gid = ip->i_d.di_gid;
2404
2405         /*
2406          * Attach the dquot(s) to this inode, doing a dquot allocation
2407          * if necessary. The dquot(s) will not be locked.
2408          */
2409         if (XFS_NOT_DQATTACHED(mp, ip)) {
2410                 if ((error = xfs_qm_dqattach(ip, XFS_QMOPT_DQALLOC |
2411                                             XFS_QMOPT_ILOCKED))) {
2412                         xfs_iunlock(ip, lockflags);
2413                         return error;
2414                 }
2415         }
2416
2417         uq = gq = NULL;
2418         if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
2419                 if (ip->i_d.di_uid != uid) {
2420                         /*
2421                          * What we need is the dquot that has this uid, and
2422                          * if we send the inode to dqget, the uid of the inode
2423                          * takes priority over what's sent in the uid argument.
2424                          * We must unlock inode here before calling dqget if
2425                          * we're not sending the inode, because otherwise
2426                          * we'll deadlock by doing trans_reserve while
2427                          * holding ilock.
2428                          */
2429                         xfs_iunlock(ip, lockflags);
2430                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
2431                                                  XFS_DQ_USER,
2432                                                  XFS_QMOPT_DQALLOC |
2433                                                  XFS_QMOPT_DOWARN,
2434                                                  &uq))) {
2435                                 ASSERT(error != ENOENT);
2436                                 return error;
2437                         }
2438                         /*
2439                          * Get the ilock in the right order.
2440                          */
2441                         xfs_dqunlock(uq);
2442                         lockflags = XFS_ILOCK_SHARED;
2443                         xfs_ilock(ip, lockflags);
2444                 } else {
2445                         /*
2446                          * Take an extra reference, because we'll return
2447                          * this to caller
2448                          */
2449                         ASSERT(ip->i_udquot);
2450                         uq = ip->i_udquot;
2451                         xfs_dqlock(uq);
2452                         XFS_DQHOLD(uq);
2453                         xfs_dqunlock(uq);
2454                 }
2455         }
2456         if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
2457                 if (ip->i_d.di_gid != gid) {
2458                         xfs_iunlock(ip, lockflags);
2459                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
2460                                                  XFS_DQ_GROUP,
2461                                                  XFS_QMOPT_DQALLOC |
2462                                                  XFS_QMOPT_DOWARN,
2463                                                  &gq))) {
2464                                 if (uq)
2465                                         xfs_qm_dqrele(uq);
2466                                 ASSERT(error != ENOENT);
2467                                 return error;
2468                         }
2469                         xfs_dqunlock(gq);
2470                         lockflags = XFS_ILOCK_SHARED;
2471                         xfs_ilock(ip, lockflags);
2472                 } else {
2473                         ASSERT(ip->i_gdquot);
2474                         gq = ip->i_gdquot;
2475                         xfs_dqlock(gq);
2476                         XFS_DQHOLD(gq);
2477                         xfs_dqunlock(gq);
2478                 }
2479         } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
2480                 if (ip->i_d.di_projid != prid) {
2481                         xfs_iunlock(ip, lockflags);
2482                         if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
2483                                                  XFS_DQ_PROJ,
2484                                                  XFS_QMOPT_DQALLOC |
2485                                                  XFS_QMOPT_DOWARN,
2486                                                  &gq))) {
2487                                 if (uq)
2488                                         xfs_qm_dqrele(uq);
2489                                 ASSERT(error != ENOENT);
2490                                 return (error);
2491                         }
2492                         xfs_dqunlock(gq);
2493                         lockflags = XFS_ILOCK_SHARED;
2494                         xfs_ilock(ip, lockflags);
2495                 } else {
2496                         ASSERT(ip->i_gdquot);
2497                         gq = ip->i_gdquot;
2498                         xfs_dqlock(gq);
2499                         XFS_DQHOLD(gq);
2500                         xfs_dqunlock(gq);
2501                 }
2502         }
2503         if (uq)
2504                 xfs_dqtrace_entry_ino(uq, "DQALLOC", ip);
2505
2506         xfs_iunlock(ip, lockflags);
2507         if (O_udqpp)
2508                 *O_udqpp = uq;
2509         else if (uq)
2510                 xfs_qm_dqrele(uq);
2511         if (O_gdqpp)
2512                 *O_gdqpp = gq;
2513         else if (gq)
2514                 xfs_qm_dqrele(gq);
2515         return 0;
2516 }
2517
2518 /*
2519  * Actually transfer ownership, and do dquot modifications.
2520  * These were already reserved.
2521  */
2522 xfs_dquot_t *
2523 xfs_qm_vop_chown(
2524         xfs_trans_t     *tp,
2525         xfs_inode_t     *ip,
2526         xfs_dquot_t     **IO_olddq,
2527         xfs_dquot_t     *newdq)
2528 {
2529         xfs_dquot_t     *prevdq;
2530         uint            bfield = XFS_IS_REALTIME_INODE(ip) ?
2531                                  XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
2532
2533         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2534         ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
2535
2536         /* old dquot */
2537         prevdq = *IO_olddq;
2538         ASSERT(prevdq);
2539         ASSERT(prevdq != newdq);
2540
2541         xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
2542         xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
2543
2544         /* the sparkling new dquot */
2545         xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
2546         xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
2547
2548         /*
2549          * Take an extra reference, because the inode
2550          * is going to keep this dquot pointer even
2551          * after the trans_commit.
2552          */
2553         xfs_dqlock(newdq);
2554         XFS_DQHOLD(newdq);
2555         xfs_dqunlock(newdq);
2556         *IO_olddq = newdq;
2557
2558         return prevdq;
2559 }
2560
2561 /*
2562  * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
2563  */
2564 int
2565 xfs_qm_vop_chown_reserve(
2566         xfs_trans_t     *tp,
2567         xfs_inode_t     *ip,
2568         xfs_dquot_t     *udqp,
2569         xfs_dquot_t     *gdqp,
2570         uint            flags)
2571 {
2572         int             error;
2573         xfs_mount_t     *mp;
2574         uint            delblks, blkflags, prjflags = 0;
2575         xfs_dquot_t     *unresudq, *unresgdq, *delblksudq, *delblksgdq;
2576
2577         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
2578         mp = ip->i_mount;
2579         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2580
2581         delblks = ip->i_delayed_blks;
2582         delblksudq = delblksgdq = unresudq = unresgdq = NULL;
2583         blkflags = XFS_IS_REALTIME_INODE(ip) ?
2584                         XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
2585
2586         if (XFS_IS_UQUOTA_ON(mp) && udqp &&
2587             ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
2588                 delblksudq = udqp;
2589                 /*
2590                  * If there are delayed allocation blocks, then we have to
2591                  * unreserve those from the old dquot, and add them to the
2592                  * new dquot.
2593                  */
2594                 if (delblks) {
2595                         ASSERT(ip->i_udquot);
2596                         unresudq = ip->i_udquot;
2597                 }
2598         }
2599         if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
2600                 if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
2601                      ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id))
2602                         prjflags = XFS_QMOPT_ENOSPC;
2603
2604                 if (prjflags ||
2605                     (XFS_IS_GQUOTA_ON(ip->i_mount) &&
2606                      ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
2607                         delblksgdq = gdqp;
2608                         if (delblks) {
2609                                 ASSERT(ip->i_gdquot);
2610                                 unresgdq = ip->i_gdquot;
2611                         }
2612                 }
2613         }
2614
2615         if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2616                                 delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
2617                                 flags | blkflags | prjflags)))
2618                 return (error);
2619
2620         /*
2621          * Do the delayed blks reservations/unreservations now. Since, these
2622          * are done without the help of a transaction, if a reservation fails
2623          * its previous reservations won't be automatically undone by trans
2624          * code. So, we have to do it manually here.
2625          */
2626         if (delblks) {
2627                 /*
2628                  * Do the reservations first. Unreservation can't fail.
2629                  */
2630                 ASSERT(delblksudq || delblksgdq);
2631                 ASSERT(unresudq || unresgdq);
2632                 if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2633                                 delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
2634                                 flags | blkflags | prjflags)))
2635                         return (error);
2636                 xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2637                                 unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
2638                                 blkflags);
2639         }
2640
2641         return (0);
2642 }
2643
2644 int
2645 xfs_qm_vop_rename_dqattach(
2646         xfs_inode_t     **i_tab)
2647 {
2648         xfs_inode_t     *ip;
2649         int             i;
2650         int             error;
2651
2652         ip = i_tab[0];
2653
2654         if (! XFS_IS_QUOTA_ON(ip->i_mount))
2655                 return 0;
2656
2657         if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2658                 error = xfs_qm_dqattach(ip, 0);
2659                 if (error)
2660                         return error;
2661         }
2662         for (i = 1; (i < 4 && i_tab[i]); i++) {
2663                 /*
2664                  * Watch out for duplicate entries in the table.
2665                  */
2666                 if ((ip = i_tab[i]) != i_tab[i-1]) {
2667                         if (XFS_NOT_DQATTACHED(ip->i_mount, ip)) {
2668                                 error = xfs_qm_dqattach(ip, 0);
2669                                 if (error)
2670                                         return error;
2671                         }
2672                 }
2673         }
2674         return 0;
2675 }
2676
2677 void
2678 xfs_qm_vop_dqattach_and_dqmod_newinode(
2679         xfs_trans_t     *tp,
2680         xfs_inode_t     *ip,
2681         xfs_dquot_t     *udqp,
2682         xfs_dquot_t     *gdqp)
2683 {
2684         if (!XFS_IS_QUOTA_ON(tp->t_mountp))
2685                 return;
2686
2687         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2688         ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
2689
2690         if (udqp) {
2691                 xfs_dqlock(udqp);
2692                 XFS_DQHOLD(udqp);
2693                 xfs_dqunlock(udqp);
2694                 ASSERT(ip->i_udquot == NULL);
2695                 ip->i_udquot = udqp;
2696                 ASSERT(XFS_IS_UQUOTA_ON(tp->t_mountp));
2697                 ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2698                 xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2699         }
2700         if (gdqp) {
2701                 xfs_dqlock(gdqp);
2702                 XFS_DQHOLD(gdqp);
2703                 xfs_dqunlock(gdqp);
2704                 ASSERT(ip->i_gdquot == NULL);
2705                 ip->i_gdquot = gdqp;
2706                 ASSERT(XFS_IS_OQUOTA_ON(tp->t_mountp));
2707                 ASSERT((XFS_IS_GQUOTA_ON(tp->t_mountp) ?
2708                         ip->i_d.di_gid : ip->i_d.di_projid) ==
2709                                 be32_to_cpu(gdqp->q_core.d_id));
2710                 xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2711         }
2712 }
2713
2714 /* ------------- list stuff -----------------*/
2715 STATIC void
2716 xfs_qm_freelist_init(xfs_frlist_t *ql)
2717 {
2718         ql->qh_next = ql->qh_prev = (xfs_dquot_t *) ql;
2719         mutex_init(&ql->qh_lock);
2720         ql->qh_version = 0;
2721         ql->qh_nelems = 0;
2722 }
2723
2724 STATIC void
2725 xfs_qm_freelist_destroy(xfs_frlist_t *ql)
2726 {
2727         xfs_dquot_t     *dqp, *nextdqp;
2728
2729         mutex_lock(&ql->qh_lock);
2730         for (dqp = ql->qh_next;
2731              dqp != (xfs_dquot_t *)ql; ) {
2732                 xfs_dqlock(dqp);
2733                 nextdqp = dqp->dq_flnext;
2734 #ifdef QUOTADEBUG
2735                 cmn_err(CE_DEBUG, "FREELIST destroy 0x%p", dqp);
2736 #endif
2737                 XQM_FREELIST_REMOVE(dqp);
2738                 xfs_dqunlock(dqp);
2739                 xfs_qm_dqdestroy(dqp);
2740                 dqp = nextdqp;
2741         }
2742         mutex_unlock(&ql->qh_lock);
2743         mutex_destroy(&ql->qh_lock);
2744
2745         ASSERT(ql->qh_nelems == 0);
2746 }
2747
2748 STATIC void
2749 xfs_qm_freelist_insert(xfs_frlist_t *ql, xfs_dquot_t *dq)
2750 {
2751         dq->dq_flnext = ql->qh_next;
2752         dq->dq_flprev = (xfs_dquot_t *)ql;
2753         ql->qh_next = dq;
2754         dq->dq_flnext->dq_flprev = dq;
2755         xfs_Gqm->qm_dqfreelist.qh_nelems++;
2756         xfs_Gqm->qm_dqfreelist.qh_version++;
2757 }
2758
2759 void
2760 xfs_qm_freelist_unlink(xfs_dquot_t *dq)
2761 {
2762         xfs_dquot_t *next = dq->dq_flnext;
2763         xfs_dquot_t *prev = dq->dq_flprev;
2764
2765         next->dq_flprev = prev;
2766         prev->dq_flnext = next;
2767         dq->dq_flnext = dq->dq_flprev = dq;
2768         xfs_Gqm->qm_dqfreelist.qh_nelems--;
2769         xfs_Gqm->qm_dqfreelist.qh_version++;
2770 }
2771
2772 void
2773 xfs_qm_freelist_append(xfs_frlist_t *ql, xfs_dquot_t *dq)
2774 {
2775         xfs_qm_freelist_insert((xfs_frlist_t *)ql->qh_prev, dq);
2776 }
2777
2778 STATIC int
2779 xfs_qm_dqhashlock_nowait(
2780         xfs_dquot_t *dqp)
2781 {
2782         int locked;
2783
2784         locked = mutex_trylock(&((dqp)->q_hash->qh_lock));
2785         return locked;
2786 }
2787
2788 int
2789 xfs_qm_freelist_lock_nowait(
2790         xfs_qm_t *xqm)
2791 {
2792         int locked;
2793
2794         locked = mutex_trylock(&(xqm->qm_dqfreelist.qh_lock));
2795         return locked;
2796 }
2797
2798 STATIC int
2799 xfs_qm_mplist_nowait(
2800         xfs_mount_t     *mp)
2801 {
2802         int locked;
2803
2804         ASSERT(mp->m_quotainfo);
2805         locked = mutex_trylock(&(XFS_QI_MPLLOCK(mp)));
2806         return locked;
2807 }