Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
[linux-2.6] / fs / xfs / quota / xfs_dquot.c
1 /*
2  * Copyright (c) 2000-2003 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_btree.h"
39 #include "xfs_ialloc.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_itable.h"
44 #include "xfs_rw.h"
45 #include "xfs_acl.h"
46 #include "xfs_attr.h"
47 #include "xfs_buf_item.h"
48 #include "xfs_trans_space.h"
49 #include "xfs_trans_priv.h"
50 #include "xfs_qm.h"
51
52
53 /*
54    LOCK ORDER
55
56    inode lock               (ilock)
57    dquot hash-chain lock    (hashlock)
58    xqm dquot freelist lock  (freelistlock
59    mount's dquot list lock  (mplistlock)
60    user dquot lock - lock ordering among dquots is based on the uid or gid
61    group dquot lock - similar to udquots. Between the two dquots, the udquot
62                       has to be locked first.
63    pin lock - the dquot lock must be held to take this lock.
64    flush lock - ditto.
65 */
66
67 STATIC void             xfs_qm_dqflush_done(xfs_buf_t *, xfs_dq_logitem_t *);
68
69 #ifdef DEBUG
70 xfs_buftarg_t *xfs_dqerror_target;
71 int xfs_do_dqerror;
72 int xfs_dqreq_num;
73 int xfs_dqerror_mod = 33;
74 #endif
75
76 /*
77  * Allocate and initialize a dquot. We don't always allocate fresh memory;
78  * we try to reclaim a free dquot if the number of incore dquots are above
79  * a threshold.
80  * The only field inside the core that gets initialized at this point
81  * is the d_id field. The idea is to fill in the entire q_core
82  * when we read in the on disk dquot.
83  */
84 STATIC xfs_dquot_t *
85 xfs_qm_dqinit(
86         xfs_mount_t  *mp,
87         xfs_dqid_t   id,
88         uint         type)
89 {
90         xfs_dquot_t     *dqp;
91         boolean_t       brandnewdquot;
92
93         brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
94         dqp->dq_flags = type;
95         dqp->q_core.d_id = cpu_to_be32(id);
96         dqp->q_mount = mp;
97
98         /*
99          * No need to re-initialize these if this is a reclaimed dquot.
100          */
101         if (brandnewdquot) {
102                 dqp->dq_flnext = dqp->dq_flprev = dqp;
103                 mutex_init(&dqp->q_qlock);
104                 sv_init(&dqp->q_pinwait, SV_DEFAULT, "pdq");
105
106                 /*
107                  * Because we want to use a counting completion, complete
108                  * the flush completion once to allow a single access to
109                  * the flush completion without blocking.
110                  */
111                 init_completion(&dqp->q_flush);
112                 complete(&dqp->q_flush);
113
114 #ifdef XFS_DQUOT_TRACE
115                 dqp->q_trace = ktrace_alloc(DQUOT_TRACE_SIZE, KM_NOFS);
116                 xfs_dqtrace_entry(dqp, "DQINIT");
117 #endif
118         } else {
119                 /*
120                  * Only the q_core portion was zeroed in dqreclaim_one().
121                  * So, we need to reset others.
122                  */
123                  dqp->q_nrefs = 0;
124                  dqp->q_blkno = 0;
125                  dqp->MPL_NEXT = dqp->HL_NEXT = NULL;
126                  dqp->HL_PREVP = dqp->MPL_PREVP = NULL;
127                  dqp->q_bufoffset = 0;
128                  dqp->q_fileoffset = 0;
129                  dqp->q_transp = NULL;
130                  dqp->q_gdquot = NULL;
131                  dqp->q_res_bcount = 0;
132                  dqp->q_res_icount = 0;
133                  dqp->q_res_rtbcount = 0;
134                  dqp->q_pincount = 0;
135                  dqp->q_hash = NULL;
136                  ASSERT(dqp->dq_flnext == dqp->dq_flprev);
137
138 #ifdef XFS_DQUOT_TRACE
139                  ASSERT(dqp->q_trace);
140                  xfs_dqtrace_entry(dqp, "DQRECLAIMED_INIT");
141 #endif
142          }
143
144         /*
145          * log item gets initialized later
146          */
147         return (dqp);
148 }
149
150 /*
151  * This is called to free all the memory associated with a dquot
152  */
153 void
154 xfs_qm_dqdestroy(
155         xfs_dquot_t     *dqp)
156 {
157         ASSERT(! XFS_DQ_IS_ON_FREELIST(dqp));
158
159         mutex_destroy(&dqp->q_qlock);
160         sv_destroy(&dqp->q_pinwait);
161
162 #ifdef XFS_DQUOT_TRACE
163         if (dqp->q_trace)
164              ktrace_free(dqp->q_trace);
165         dqp->q_trace = NULL;
166 #endif
167         kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
168         atomic_dec(&xfs_Gqm->qm_totaldquots);
169 }
170
171 /*
172  * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
173  */
174 STATIC void
175 xfs_qm_dqinit_core(
176         xfs_dqid_t      id,
177         uint            type,
178         xfs_dqblk_t     *d)
179 {
180         /*
181          * Caller has zero'd the entire dquot 'chunk' already.
182          */
183         d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
184         d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
185         d->dd_diskdq.d_id = cpu_to_be32(id);
186         d->dd_diskdq.d_flags = type;
187 }
188
189
190 #ifdef XFS_DQUOT_TRACE
191 /*
192  * Dquot tracing for debugging.
193  */
194 /* ARGSUSED */
195 void
196 __xfs_dqtrace_entry(
197         xfs_dquot_t     *dqp,
198         char            *func,
199         void            *retaddr,
200         xfs_inode_t     *ip)
201 {
202         xfs_dquot_t     *udqp = NULL;
203         xfs_ino_t       ino = 0;
204
205         ASSERT(dqp->q_trace);
206         if (ip) {
207                 ino = ip->i_ino;
208                 udqp = ip->i_udquot;
209         }
210         ktrace_enter(dqp->q_trace,
211                      (void *)(__psint_t)DQUOT_KTRACE_ENTRY,
212                      (void *)func,
213                      (void *)(__psint_t)dqp->q_nrefs,
214                      (void *)(__psint_t)dqp->dq_flags,
215                      (void *)(__psint_t)dqp->q_res_bcount,
216                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_bcount),
217                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_icount),
218                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_hardlimit),
219                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_blk_softlimit),
220                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_hardlimit),
221                      (void *)(__psint_t)be64_to_cpu(dqp->q_core.d_ino_softlimit),
222                      (void *)(__psint_t)be32_to_cpu(dqp->q_core.d_id),
223                      (void *)(__psint_t)current_pid(),
224                      (void *)(__psint_t)ino,
225                      (void *)(__psint_t)retaddr,
226                      (void *)(__psint_t)udqp);
227         return;
228 }
229 #endif
230
231
232 /*
233  * If default limits are in force, push them into the dquot now.
234  * We overwrite the dquot limits only if they are zero and this
235  * is not the root dquot.
236  */
237 void
238 xfs_qm_adjust_dqlimits(
239         xfs_mount_t             *mp,
240         xfs_disk_dquot_t        *d)
241 {
242         xfs_quotainfo_t         *q = mp->m_quotainfo;
243
244         ASSERT(d->d_id);
245
246         if (q->qi_bsoftlimit && !d->d_blk_softlimit)
247                 d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
248         if (q->qi_bhardlimit && !d->d_blk_hardlimit)
249                 d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
250         if (q->qi_isoftlimit && !d->d_ino_softlimit)
251                 d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
252         if (q->qi_ihardlimit && !d->d_ino_hardlimit)
253                 d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
254         if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
255                 d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
256         if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
257                 d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
258 }
259
260 /*
261  * Check the limits and timers of a dquot and start or reset timers
262  * if necessary.
263  * This gets called even when quota enforcement is OFF, which makes our
264  * life a little less complicated. (We just don't reject any quota
265  * reservations in that case, when enforcement is off).
266  * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
267  * enforcement's off.
268  * In contrast, warnings are a little different in that they don't
269  * 'automatically' get started when limits get exceeded.  They do
270  * get reset to zero, however, when we find the count to be under
271  * the soft limit (they are only ever set non-zero via userspace).
272  */
273 void
274 xfs_qm_adjust_dqtimers(
275         xfs_mount_t             *mp,
276         xfs_disk_dquot_t        *d)
277 {
278         ASSERT(d->d_id);
279
280 #ifdef QUOTADEBUG
281         if (d->d_blk_hardlimit)
282                 ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
283                        be64_to_cpu(d->d_blk_hardlimit));
284         if (d->d_ino_hardlimit)
285                 ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
286                        be64_to_cpu(d->d_ino_hardlimit));
287         if (d->d_rtb_hardlimit)
288                 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
289                        be64_to_cpu(d->d_rtb_hardlimit));
290 #endif
291         if (!d->d_btimer) {
292                 if ((d->d_blk_softlimit &&
293                      (be64_to_cpu(d->d_bcount) >=
294                       be64_to_cpu(d->d_blk_softlimit))) ||
295                     (d->d_blk_hardlimit &&
296                      (be64_to_cpu(d->d_bcount) >=
297                       be64_to_cpu(d->d_blk_hardlimit)))) {
298                         d->d_btimer = cpu_to_be32(get_seconds() +
299                                         XFS_QI_BTIMELIMIT(mp));
300                 } else {
301                         d->d_bwarns = 0;
302                 }
303         } else {
304                 if ((!d->d_blk_softlimit ||
305                      (be64_to_cpu(d->d_bcount) <
306                       be64_to_cpu(d->d_blk_softlimit))) &&
307                     (!d->d_blk_hardlimit ||
308                     (be64_to_cpu(d->d_bcount) <
309                      be64_to_cpu(d->d_blk_hardlimit)))) {
310                         d->d_btimer = 0;
311                 }
312         }
313
314         if (!d->d_itimer) {
315                 if ((d->d_ino_softlimit &&
316                      (be64_to_cpu(d->d_icount) >=
317                       be64_to_cpu(d->d_ino_softlimit))) ||
318                     (d->d_ino_hardlimit &&
319                      (be64_to_cpu(d->d_icount) >=
320                       be64_to_cpu(d->d_ino_hardlimit)))) {
321                         d->d_itimer = cpu_to_be32(get_seconds() +
322                                         XFS_QI_ITIMELIMIT(mp));
323                 } else {
324                         d->d_iwarns = 0;
325                 }
326         } else {
327                 if ((!d->d_ino_softlimit ||
328                      (be64_to_cpu(d->d_icount) <
329                       be64_to_cpu(d->d_ino_softlimit)))  &&
330                     (!d->d_ino_hardlimit ||
331                      (be64_to_cpu(d->d_icount) <
332                       be64_to_cpu(d->d_ino_hardlimit)))) {
333                         d->d_itimer = 0;
334                 }
335         }
336
337         if (!d->d_rtbtimer) {
338                 if ((d->d_rtb_softlimit &&
339                      (be64_to_cpu(d->d_rtbcount) >=
340                       be64_to_cpu(d->d_rtb_softlimit))) ||
341                     (d->d_rtb_hardlimit &&
342                      (be64_to_cpu(d->d_rtbcount) >=
343                       be64_to_cpu(d->d_rtb_hardlimit)))) {
344                         d->d_rtbtimer = cpu_to_be32(get_seconds() +
345                                         XFS_QI_RTBTIMELIMIT(mp));
346                 } else {
347                         d->d_rtbwarns = 0;
348                 }
349         } else {
350                 if ((!d->d_rtb_softlimit ||
351                      (be64_to_cpu(d->d_rtbcount) <
352                       be64_to_cpu(d->d_rtb_softlimit))) &&
353                     (!d->d_rtb_hardlimit ||
354                      (be64_to_cpu(d->d_rtbcount) <
355                       be64_to_cpu(d->d_rtb_hardlimit)))) {
356                         d->d_rtbtimer = 0;
357                 }
358         }
359 }
360
361 /*
362  * initialize a buffer full of dquots and log the whole thing
363  */
364 STATIC void
365 xfs_qm_init_dquot_blk(
366         xfs_trans_t     *tp,
367         xfs_mount_t     *mp,
368         xfs_dqid_t      id,
369         uint            type,
370         xfs_buf_t       *bp)
371 {
372         xfs_dqblk_t     *d;
373         int             curid, i;
374
375         ASSERT(tp);
376         ASSERT(XFS_BUF_ISBUSY(bp));
377         ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
378
379         d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
380
381         /*
382          * ID of the first dquot in the block - id's are zero based.
383          */
384         curid = id - (id % XFS_QM_DQPERBLK(mp));
385         ASSERT(curid >= 0);
386         memset(d, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)));
387         for (i = 0; i < XFS_QM_DQPERBLK(mp); i++, d++, curid++)
388                 xfs_qm_dqinit_core(curid, type, d);
389         xfs_trans_dquot_buf(tp, bp,
390                             (type & XFS_DQ_USER ? XFS_BLI_UDQUOT_BUF :
391                             ((type & XFS_DQ_PROJ) ? XFS_BLI_PDQUOT_BUF :
392                              XFS_BLI_GDQUOT_BUF)));
393         xfs_trans_log_buf(tp, bp, 0, BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1);
394 }
395
396
397
398 /*
399  * Allocate a block and fill it with dquots.
400  * This is called when the bmapi finds a hole.
401  */
402 STATIC int
403 xfs_qm_dqalloc(
404         xfs_trans_t     **tpp,
405         xfs_mount_t     *mp,
406         xfs_dquot_t     *dqp,
407         xfs_inode_t     *quotip,
408         xfs_fileoff_t   offset_fsb,
409         xfs_buf_t       **O_bpp)
410 {
411         xfs_fsblock_t   firstblock;
412         xfs_bmap_free_t flist;
413         xfs_bmbt_irec_t map;
414         int             nmaps, error, committed;
415         xfs_buf_t       *bp;
416         xfs_trans_t     *tp = *tpp;
417
418         ASSERT(tp != NULL);
419         xfs_dqtrace_entry(dqp, "DQALLOC");
420
421         /*
422          * Initialize the bmap freelist prior to calling bmapi code.
423          */
424         XFS_BMAP_INIT(&flist, &firstblock);
425         xfs_ilock(quotip, XFS_ILOCK_EXCL);
426         /*
427          * Return if this type of quotas is turned off while we didn't
428          * have an inode lock
429          */
430         if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
431                 xfs_iunlock(quotip, XFS_ILOCK_EXCL);
432                 return (ESRCH);
433         }
434
435         /*
436          * xfs_trans_commit normally decrements the vnode ref count
437          * when it unlocks the inode. Since we want to keep the quota
438          * inode around, we bump the vnode ref count now.
439          */
440         IHOLD(quotip);
441
442         xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
443         nmaps = 1;
444         if ((error = xfs_bmapi(tp, quotip,
445                               offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
446                               XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
447                               &firstblock,
448                               XFS_QM_DQALLOC_SPACE_RES(mp),
449                               &map, &nmaps, &flist, NULL))) {
450                 goto error0;
451         }
452         ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
453         ASSERT(nmaps == 1);
454         ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
455                (map.br_startblock != HOLESTARTBLOCK));
456
457         /*
458          * Keep track of the blkno to save a lookup later
459          */
460         dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
461
462         /* now we can just get the buffer (there's nothing to read yet) */
463         bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
464                                dqp->q_blkno,
465                                XFS_QI_DQCHUNKLEN(mp),
466                                0);
467         if (!bp || (error = XFS_BUF_GETERROR(bp)))
468                 goto error1;
469         /*
470          * Make a chunk of dquots out of this buffer and log
471          * the entire thing.
472          */
473         xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
474                               dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
475
476         /*
477          * xfs_bmap_finish() may commit the current transaction and
478          * start a second transaction if the freelist is not empty.
479          *
480          * Since we still want to modify this buffer, we need to
481          * ensure that the buffer is not released on commit of
482          * the first transaction and ensure the buffer is added to the
483          * second transaction.
484          *
485          * If there is only one transaction then don't stop the buffer
486          * from being released when it commits later on.
487          */
488
489         xfs_trans_bhold(tp, bp);
490
491         if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
492                 goto error1;
493         }
494
495         if (committed) {
496                 tp = *tpp;
497                 xfs_trans_bjoin(tp, bp);
498         } else {
499                 xfs_trans_bhold_release(tp, bp);
500         }
501
502         *O_bpp = bp;
503         return 0;
504
505       error1:
506         xfs_bmap_cancel(&flist);
507       error0:
508         xfs_iunlock(quotip, XFS_ILOCK_EXCL);
509
510         return (error);
511 }
512
513 /*
514  * Maps a dquot to the buffer containing its on-disk version.
515  * This returns a ptr to the buffer containing the on-disk dquot
516  * in the bpp param, and a ptr to the on-disk dquot within that buffer
517  */
518 STATIC int
519 xfs_qm_dqtobp(
520         xfs_trans_t             **tpp,
521         xfs_dquot_t             *dqp,
522         xfs_disk_dquot_t        **O_ddpp,
523         xfs_buf_t               **O_bpp,
524         uint                    flags)
525 {
526         xfs_bmbt_irec_t map;
527         int             nmaps, error;
528         xfs_buf_t       *bp;
529         xfs_inode_t     *quotip;
530         xfs_mount_t     *mp;
531         xfs_disk_dquot_t *ddq;
532         xfs_dqid_t      id;
533         boolean_t       newdquot;
534         xfs_trans_t     *tp = (tpp ? *tpp : NULL);
535
536         mp = dqp->q_mount;
537         id = be32_to_cpu(dqp->q_core.d_id);
538         nmaps = 1;
539         newdquot = B_FALSE;
540
541         /*
542          * If we don't know where the dquot lives, find out.
543          */
544         if (dqp->q_blkno == (xfs_daddr_t) 0) {
545                 /* We use the id as an index */
546                 dqp->q_fileoffset = (xfs_fileoff_t)id / XFS_QM_DQPERBLK(mp);
547                 nmaps = 1;
548                 quotip = XFS_DQ_TO_QIP(dqp);
549                 xfs_ilock(quotip, XFS_ILOCK_SHARED);
550                 /*
551                  * Return if this type of quotas is turned off while we didn't
552                  * have an inode lock
553                  */
554                 if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
555                         xfs_iunlock(quotip, XFS_ILOCK_SHARED);
556                         return (ESRCH);
557                 }
558                 /*
559                  * Find the block map; no allocations yet
560                  */
561                 error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
562                                   XFS_DQUOT_CLUSTER_SIZE_FSB,
563                                   XFS_BMAPI_METADATA,
564                                   NULL, 0, &map, &nmaps, NULL, NULL);
565
566                 xfs_iunlock(quotip, XFS_ILOCK_SHARED);
567                 if (error)
568                         return (error);
569                 ASSERT(nmaps == 1);
570                 ASSERT(map.br_blockcount == 1);
571
572                 /*
573                  * offset of dquot in the (fixed sized) dquot chunk.
574                  */
575                 dqp->q_bufoffset = (id % XFS_QM_DQPERBLK(mp)) *
576                         sizeof(xfs_dqblk_t);
577                 if (map.br_startblock == HOLESTARTBLOCK) {
578                         /*
579                          * We don't allocate unless we're asked to
580                          */
581                         if (!(flags & XFS_QMOPT_DQALLOC))
582                                 return (ENOENT);
583
584                         ASSERT(tp);
585                         if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
586                                                 dqp->q_fileoffset, &bp)))
587                                 return (error);
588                         tp = *tpp;
589                         newdquot = B_TRUE;
590                 } else {
591                         /*
592                          * store the blkno etc so that we don't have to do the
593                          * mapping all the time
594                          */
595                         dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
596                 }
597         }
598         ASSERT(dqp->q_blkno != DELAYSTARTBLOCK);
599         ASSERT(dqp->q_blkno != HOLESTARTBLOCK);
600
601         /*
602          * Read in the buffer, unless we've just done the allocation
603          * (in which case we already have the buf).
604          */
605         if (! newdquot) {
606                 xfs_dqtrace_entry(dqp, "DQTOBP READBUF");
607                 if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
608                                                dqp->q_blkno,
609                                                XFS_QI_DQCHUNKLEN(mp),
610                                                0, &bp))) {
611                         return (error);
612                 }
613                 if (error || !bp)
614                         return XFS_ERROR(error);
615         }
616         ASSERT(XFS_BUF_ISBUSY(bp));
617         ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
618
619         /*
620          * calculate the location of the dquot inside the buffer.
621          */
622         ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset);
623
624         /*
625          * A simple sanity check in case we got a corrupted dquot...
626          */
627         if (xfs_qm_dqcheck(ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
628                            flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
629                            "dqtobp")) {
630                 if (!(flags & XFS_QMOPT_DQREPAIR)) {
631                         xfs_trans_brelse(tp, bp);
632                         return XFS_ERROR(EIO);
633                 }
634                 XFS_BUF_BUSY(bp); /* We dirtied this */
635         }
636
637         *O_bpp = bp;
638         *O_ddpp = ddq;
639
640         return (0);
641 }
642
643
644 /*
645  * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
646  * and release the buffer immediately.
647  *
648  */
649 /* ARGSUSED */
650 STATIC int
651 xfs_qm_dqread(
652         xfs_trans_t     **tpp,
653         xfs_dqid_t      id,
654         xfs_dquot_t     *dqp,   /* dquot to get filled in */
655         uint            flags)
656 {
657         xfs_disk_dquot_t *ddqp;
658         xfs_buf_t        *bp;
659         int              error;
660         xfs_trans_t      *tp;
661
662         ASSERT(tpp);
663
664         /*
665          * get a pointer to the on-disk dquot and the buffer containing it
666          * dqp already knows its own type (GROUP/USER).
667          */
668         xfs_dqtrace_entry(dqp, "DQREAD");
669         if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
670                 return (error);
671         }
672         tp = *tpp;
673
674         /* copy everything from disk dquot to the incore dquot */
675         memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
676         ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
677         xfs_qm_dquot_logitem_init(dqp);
678
679         /*
680          * Reservation counters are defined as reservation plus current usage
681          * to avoid having to add everytime.
682          */
683         dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
684         dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
685         dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
686
687         /* Mark the buf so that this will stay incore a little longer */
688         XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
689
690         /*
691          * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
692          * So we need to release with xfs_trans_brelse().
693          * The strategy here is identical to that of inodes; we lock
694          * the dquot in xfs_qm_dqget() before making it accessible to
695          * others. This is because dquots, like inodes, need a good level of
696          * concurrency, and we don't want to take locks on the entire buffers
697          * for dquot accesses.
698          * Note also that the dquot buffer may even be dirty at this point, if
699          * this particular dquot was repaired. We still aren't afraid to
700          * brelse it because we have the changes incore.
701          */
702         ASSERT(XFS_BUF_ISBUSY(bp));
703         ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
704         xfs_trans_brelse(tp, bp);
705
706         return (error);
707 }
708
709
710 /*
711  * allocate an incore dquot from the kernel heap,
712  * and fill its core with quota information kept on disk.
713  * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
714  * if it wasn't already allocated.
715  */
716 STATIC int
717 xfs_qm_idtodq(
718         xfs_mount_t     *mp,
719         xfs_dqid_t      id,      /* gid or uid, depending on type */
720         uint            type,    /* UDQUOT or GDQUOT */
721         uint            flags,   /* DQALLOC, DQREPAIR */
722         xfs_dquot_t     **O_dqpp)/* OUT : incore dquot, not locked */
723 {
724         xfs_dquot_t     *dqp;
725         int             error;
726         xfs_trans_t     *tp;
727         int             cancelflags=0;
728
729         dqp = xfs_qm_dqinit(mp, id, type);
730         tp = NULL;
731         if (flags & XFS_QMOPT_DQALLOC) {
732                 tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
733                 if ((error = xfs_trans_reserve(tp,
734                                        XFS_QM_DQALLOC_SPACE_RES(mp),
735                                        XFS_WRITE_LOG_RES(mp) +
736                                               BBTOB(XFS_QI_DQCHUNKLEN(mp)) - 1 +
737                                               128,
738                                        0,
739                                        XFS_TRANS_PERM_LOG_RES,
740                                        XFS_WRITE_LOG_COUNT))) {
741                         cancelflags = 0;
742                         goto error0;
743                 }
744                 cancelflags = XFS_TRANS_RELEASE_LOG_RES;
745         }
746
747         /*
748          * Read it from disk; xfs_dqread() takes care of
749          * all the necessary initialization of dquot's fields (locks, etc)
750          */
751         if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
752                 /*
753                  * This can happen if quotas got turned off (ESRCH),
754                  * or if the dquot didn't exist on disk and we ask to
755                  * allocate (ENOENT).
756                  */
757                 xfs_dqtrace_entry(dqp, "DQREAD FAIL");
758                 cancelflags |= XFS_TRANS_ABORT;
759                 goto error0;
760         }
761         if (tp) {
762                 if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
763                         goto error1;
764         }
765
766         *O_dqpp = dqp;
767         return (0);
768
769  error0:
770         ASSERT(error);
771         if (tp)
772                 xfs_trans_cancel(tp, cancelflags);
773  error1:
774         xfs_qm_dqdestroy(dqp);
775         *O_dqpp = NULL;
776         return (error);
777 }
778
779 /*
780  * Lookup a dquot in the incore dquot hashtable. We keep two separate
781  * hashtables for user and group dquots; and, these are global tables
782  * inside the XQM, not per-filesystem tables.
783  * The hash chain must be locked by caller, and it is left locked
784  * on return. Returning dquot is locked.
785  */
786 STATIC int
787 xfs_qm_dqlookup(
788         xfs_mount_t             *mp,
789         xfs_dqid_t              id,
790         xfs_dqhash_t            *qh,
791         xfs_dquot_t             **O_dqpp)
792 {
793         xfs_dquot_t             *dqp;
794         uint                    flist_locked;
795         xfs_dquot_t             *d;
796
797         ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
798
799         flist_locked = B_FALSE;
800
801         /*
802          * Traverse the hashchain looking for a match
803          */
804         for (dqp = qh->qh_next; dqp != NULL; dqp = dqp->HL_NEXT) {
805                 /*
806                  * We already have the hashlock. We don't need the
807                  * dqlock to look at the id field of the dquot, since the
808                  * id can't be modified without the hashlock anyway.
809                  */
810                 if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
811                         xfs_dqtrace_entry(dqp, "DQFOUND BY LOOKUP");
812                         /*
813                          * All in core dquots must be on the dqlist of mp
814                          */
815                         ASSERT(dqp->MPL_PREVP != NULL);
816
817                         xfs_dqlock(dqp);
818                         if (dqp->q_nrefs == 0) {
819                                 ASSERT (XFS_DQ_IS_ON_FREELIST(dqp));
820                                 if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
821                                         xfs_dqtrace_entry(dqp, "DQLOOKUP: WANT");
822
823                                         /*
824                                          * We may have raced with dqreclaim_one()
825                                          * (and lost). So, flag that we don't
826                                          * want the dquot to be reclaimed.
827                                          */
828                                         dqp->dq_flags |= XFS_DQ_WANT;
829                                         xfs_dqunlock(dqp);
830                                         xfs_qm_freelist_lock(xfs_Gqm);
831                                         xfs_dqlock(dqp);
832                                         dqp->dq_flags &= ~(XFS_DQ_WANT);
833                                 }
834                                 flist_locked = B_TRUE;
835                         }
836
837                         /*
838                          * id couldn't have changed; we had the hashlock all
839                          * along
840                          */
841                         ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
842
843                         if (flist_locked) {
844                                 if (dqp->q_nrefs != 0) {
845                                         xfs_qm_freelist_unlock(xfs_Gqm);
846                                         flist_locked = B_FALSE;
847                                 } else {
848                                         /*
849                                          * take it off the freelist
850                                          */
851                                         xfs_dqtrace_entry(dqp,
852                                                         "DQLOOKUP: TAKEOFF FL");
853                                         XQM_FREELIST_REMOVE(dqp);
854                                         /* xfs_qm_freelist_print(&(xfs_Gqm->
855                                                         qm_dqfreelist),
856                                                         "after removal"); */
857                                 }
858                         }
859
860                         /*
861                          * grab a reference
862                          */
863                         XFS_DQHOLD(dqp);
864
865                         if (flist_locked)
866                                 xfs_qm_freelist_unlock(xfs_Gqm);
867                         /*
868                          * move the dquot to the front of the hashchain
869                          */
870                         ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
871                         if (dqp->HL_PREVP != &qh->qh_next) {
872                                 xfs_dqtrace_entry(dqp,
873                                                   "DQLOOKUP: HASH MOVETOFRONT");
874                                 if ((d = dqp->HL_NEXT))
875                                         d->HL_PREVP = dqp->HL_PREVP;
876                                 *(dqp->HL_PREVP) = d;
877                                 d = qh->qh_next;
878                                 d->HL_PREVP = &dqp->HL_NEXT;
879                                 dqp->HL_NEXT = d;
880                                 dqp->HL_PREVP = &qh->qh_next;
881                                 qh->qh_next = dqp;
882                         }
883                         xfs_dqtrace_entry(dqp, "LOOKUP END");
884                         *O_dqpp = dqp;
885                         ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
886                         return (0);
887                 }
888         }
889
890         *O_dqpp = NULL;
891         ASSERT(XFS_DQ_IS_HASH_LOCKED(qh));
892         return (1);
893 }
894
895 /*
896  * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
897  * a locked dquot, doing an allocation (if requested) as needed.
898  * When both an inode and an id are given, the inode's id takes precedence.
899  * That is, if the id changes while we don't hold the ilock inside this
900  * function, the new dquot is returned, not necessarily the one requested
901  * in the id argument.
902  */
903 int
904 xfs_qm_dqget(
905         xfs_mount_t     *mp,
906         xfs_inode_t     *ip,      /* locked inode (optional) */
907         xfs_dqid_t      id,       /* uid/projid/gid depending on type */
908         uint            type,     /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
909         uint            flags,    /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
910         xfs_dquot_t     **O_dqpp) /* OUT : locked incore dquot */
911 {
912         xfs_dquot_t     *dqp;
913         xfs_dqhash_t    *h;
914         uint            version;
915         int             error;
916
917         ASSERT(XFS_IS_QUOTA_RUNNING(mp));
918         if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
919             (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
920             (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
921                 return (ESRCH);
922         }
923         h = XFS_DQ_HASH(mp, id, type);
924
925 #ifdef DEBUG
926         if (xfs_do_dqerror) {
927                 if ((xfs_dqerror_target == mp->m_ddev_targp) &&
928                     (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
929                         cmn_err(CE_DEBUG, "Returning error in dqget");
930                         return (EIO);
931                 }
932         }
933 #endif
934
935  again:
936
937 #ifdef DEBUG
938         ASSERT(type == XFS_DQ_USER ||
939                type == XFS_DQ_PROJ ||
940                type == XFS_DQ_GROUP);
941         if (ip) {
942                 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
943                 if (type == XFS_DQ_USER)
944                         ASSERT(ip->i_udquot == NULL);
945                 else
946                         ASSERT(ip->i_gdquot == NULL);
947         }
948 #endif
949         XFS_DQ_HASH_LOCK(h);
950
951         /*
952          * Look in the cache (hashtable).
953          * The chain is kept locked during lookup.
954          */
955         if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
956                 XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
957                 /*
958                  * The dquot was found, moved to the front of the chain,
959                  * taken off the freelist if it was on it, and locked
960                  * at this point. Just unlock the hashchain and return.
961                  */
962                 ASSERT(*O_dqpp);
963                 ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
964                 XFS_DQ_HASH_UNLOCK(h);
965                 xfs_dqtrace_entry(*O_dqpp, "DQGET DONE (FROM CACHE)");
966                 return (0);     /* success */
967         }
968         XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
969
970         /*
971          * Dquot cache miss. We don't want to keep the inode lock across
972          * a (potential) disk read. Also we don't want to deal with the lock
973          * ordering between quotainode and this inode. OTOH, dropping the inode
974          * lock here means dealing with a chown that can happen before
975          * we re-acquire the lock.
976          */
977         if (ip)
978                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
979         /*
980          * Save the hashchain version stamp, and unlock the chain, so that
981          * we don't keep the lock across a disk read
982          */
983         version = h->qh_version;
984         XFS_DQ_HASH_UNLOCK(h);
985
986         /*
987          * Allocate the dquot on the kernel heap, and read the ondisk
988          * portion off the disk. Also, do all the necessary initialization
989          * This can return ENOENT if dquot didn't exist on disk and we didn't
990          * ask it to allocate; ESRCH if quotas got turned off suddenly.
991          */
992         if ((error = xfs_qm_idtodq(mp, id, type,
993                                   flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
994                                            XFS_QMOPT_DOWARN),
995                                   &dqp))) {
996                 if (ip)
997                         xfs_ilock(ip, XFS_ILOCK_EXCL);
998                 return (error);
999         }
1000
1001         /*
1002          * See if this is mount code calling to look at the overall quota limits
1003          * which are stored in the id == 0 user or group's dquot.
1004          * Since we may not have done a quotacheck by this point, just return
1005          * the dquot without attaching it to any hashtables, lists, etc, or even
1006          * taking a reference.
1007          * The caller must dqdestroy this once done.
1008          */
1009         if (flags & XFS_QMOPT_DQSUSER) {
1010                 ASSERT(id == 0);
1011                 ASSERT(! ip);
1012                 goto dqret;
1013         }
1014
1015         /*
1016          * Dquot lock comes after hashlock in the lock ordering
1017          */
1018         if (ip) {
1019                 xfs_ilock(ip, XFS_ILOCK_EXCL);
1020                 if (! XFS_IS_DQTYPE_ON(mp, type)) {
1021                         /* inode stays locked on return */
1022                         xfs_qm_dqdestroy(dqp);
1023                         return XFS_ERROR(ESRCH);
1024                 }
1025                 /*
1026                  * A dquot could be attached to this inode by now, since
1027                  * we had dropped the ilock.
1028                  */
1029                 if (type == XFS_DQ_USER) {
1030                         if (ip->i_udquot) {
1031                                 xfs_qm_dqdestroy(dqp);
1032                                 dqp = ip->i_udquot;
1033                                 xfs_dqlock(dqp);
1034                                 goto dqret;
1035                         }
1036                 } else {
1037                         if (ip->i_gdquot) {
1038                                 xfs_qm_dqdestroy(dqp);
1039                                 dqp = ip->i_gdquot;
1040                                 xfs_dqlock(dqp);
1041                                 goto dqret;
1042                         }
1043                 }
1044         }
1045
1046         /*
1047          * Hashlock comes after ilock in lock order
1048          */
1049         XFS_DQ_HASH_LOCK(h);
1050         if (version != h->qh_version) {
1051                 xfs_dquot_t *tmpdqp;
1052                 /*
1053                  * Now, see if somebody else put the dquot in the
1054                  * hashtable before us. This can happen because we didn't
1055                  * keep the hashchain lock. We don't have to worry about
1056                  * lock order between the two dquots here since dqp isn't
1057                  * on any findable lists yet.
1058                  */
1059                 if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
1060                         /*
1061                          * Duplicate found. Just throw away the new dquot
1062                          * and start over.
1063                          */
1064                         xfs_qm_dqput(tmpdqp);
1065                         XFS_DQ_HASH_UNLOCK(h);
1066                         xfs_qm_dqdestroy(dqp);
1067                         XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
1068                         goto again;
1069                 }
1070         }
1071
1072         /*
1073          * Put the dquot at the beginning of the hash-chain and mp's list
1074          * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
1075          */
1076         ASSERT(XFS_DQ_IS_HASH_LOCKED(h));
1077         dqp->q_hash = h;
1078         XQM_HASHLIST_INSERT(h, dqp);
1079
1080         /*
1081          * Attach this dquot to this filesystem's list of all dquots,
1082          * kept inside the mount structure in m_quotainfo field
1083          */
1084         xfs_qm_mplist_lock(mp);
1085
1086         /*
1087          * We return a locked dquot to the caller, with a reference taken
1088          */
1089         xfs_dqlock(dqp);
1090         dqp->q_nrefs = 1;
1091
1092         XQM_MPLIST_INSERT(&(XFS_QI_MPL_LIST(mp)), dqp);
1093
1094         xfs_qm_mplist_unlock(mp);
1095         XFS_DQ_HASH_UNLOCK(h);
1096  dqret:
1097         ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
1098         xfs_dqtrace_entry(dqp, "DQGET DONE");
1099         *O_dqpp = dqp;
1100         return (0);
1101 }
1102
1103
1104 /*
1105  * Release a reference to the dquot (decrement ref-count)
1106  * and unlock it. If there is a group quota attached to this
1107  * dquot, carefully release that too without tripping over
1108  * deadlocks'n'stuff.
1109  */
1110 void
1111 xfs_qm_dqput(
1112         xfs_dquot_t     *dqp)
1113 {
1114         xfs_dquot_t     *gdqp;
1115
1116         ASSERT(dqp->q_nrefs > 0);
1117         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1118         xfs_dqtrace_entry(dqp, "DQPUT");
1119
1120         if (dqp->q_nrefs != 1) {
1121                 dqp->q_nrefs--;
1122                 xfs_dqunlock(dqp);
1123                 return;
1124         }
1125
1126         /*
1127          * drop the dqlock and acquire the freelist and dqlock
1128          * in the right order; but try to get it out-of-order first
1129          */
1130         if (! xfs_qm_freelist_lock_nowait(xfs_Gqm)) {
1131                 xfs_dqtrace_entry(dqp, "DQPUT: FLLOCK-WAIT");
1132                 xfs_dqunlock(dqp);
1133                 xfs_qm_freelist_lock(xfs_Gqm);
1134                 xfs_dqlock(dqp);
1135         }
1136
1137         while (1) {
1138                 gdqp = NULL;
1139
1140                 /* We can't depend on nrefs being == 1 here */
1141                 if (--dqp->q_nrefs == 0) {
1142                         xfs_dqtrace_entry(dqp, "DQPUT: ON FREELIST");
1143                         /*
1144                          * insert at end of the freelist.
1145                          */
1146                         XQM_FREELIST_INSERT(&(xfs_Gqm->qm_dqfreelist), dqp);
1147
1148                         /*
1149                          * If we just added a udquot to the freelist, then
1150                          * we want to release the gdquot reference that
1151                          * it (probably) has. Otherwise it'll keep the
1152                          * gdquot from getting reclaimed.
1153                          */
1154                         if ((gdqp = dqp->q_gdquot)) {
1155                                 /*
1156                                  * Avoid a recursive dqput call
1157                                  */
1158                                 xfs_dqlock(gdqp);
1159                                 dqp->q_gdquot = NULL;
1160                         }
1161
1162                         /* xfs_qm_freelist_print(&(xfs_Gqm->qm_dqfreelist),
1163                            "@@@@@++ Free list (after append) @@@@@+");
1164                            */
1165                 }
1166                 xfs_dqunlock(dqp);
1167
1168                 /*
1169                  * If we had a group quota inside the user quota as a hint,
1170                  * release it now.
1171                  */
1172                 if (! gdqp)
1173                         break;
1174                 dqp = gdqp;
1175         }
1176         xfs_qm_freelist_unlock(xfs_Gqm);
1177 }
1178
1179 /*
1180  * Release a dquot. Flush it if dirty, then dqput() it.
1181  * dquot must not be locked.
1182  */
1183 void
1184 xfs_qm_dqrele(
1185         xfs_dquot_t     *dqp)
1186 {
1187         ASSERT(dqp);
1188         xfs_dqtrace_entry(dqp, "DQRELE");
1189
1190         xfs_dqlock(dqp);
1191         /*
1192          * We don't care to flush it if the dquot is dirty here.
1193          * That will create stutters that we want to avoid.
1194          * Instead we do a delayed write when we try to reclaim
1195          * a dirty dquot. Also xfs_sync will take part of the burden...
1196          */
1197         xfs_qm_dqput(dqp);
1198 }
1199
1200
1201 /*
1202  * Write a modified dquot to disk.
1203  * The dquot must be locked and the flush lock too taken by caller.
1204  * The flush lock will not be unlocked until the dquot reaches the disk,
1205  * but the dquot is free to be unlocked and modified by the caller
1206  * in the interim. Dquot is still locked on return. This behavior is
1207  * identical to that of inodes.
1208  */
1209 int
1210 xfs_qm_dqflush(
1211         xfs_dquot_t             *dqp,
1212         uint                    flags)
1213 {
1214         xfs_mount_t             *mp;
1215         xfs_buf_t               *bp;
1216         xfs_disk_dquot_t        *ddqp;
1217         int                     error;
1218
1219         ASSERT(XFS_DQ_IS_LOCKED(dqp));
1220         ASSERT(!completion_done(&dqp->q_flush));
1221         xfs_dqtrace_entry(dqp, "DQFLUSH");
1222
1223         /*
1224          * If not dirty, nada.
1225          */
1226         if (!XFS_DQ_IS_DIRTY(dqp)) {
1227                 xfs_dqfunlock(dqp);
1228                 return (0);
1229         }
1230
1231         /*
1232          * Cant flush a pinned dquot. Wait for it.
1233          */
1234         xfs_qm_dqunpin_wait(dqp);
1235
1236         /*
1237          * This may have been unpinned because the filesystem is shutting
1238          * down forcibly. If that's the case we must not write this dquot
1239          * to disk, because the log record didn't make it to disk!
1240          */
1241         if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) {
1242                 dqp->dq_flags &= ~(XFS_DQ_DIRTY);
1243                 xfs_dqfunlock(dqp);
1244                 return XFS_ERROR(EIO);
1245         }
1246
1247         /*
1248          * Get the buffer containing the on-disk dquot
1249          * We don't need a transaction envelope because we know that the
1250          * the ondisk-dquot has already been allocated for.
1251          */
1252         if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) {
1253                 xfs_dqtrace_entry(dqp, "DQTOBP FAIL");
1254                 ASSERT(error != ENOENT);
1255                 /*
1256                  * Quotas could have gotten turned off (ESRCH)
1257                  */
1258                 xfs_dqfunlock(dqp);
1259                 return (error);
1260         }
1261
1262         if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id),
1263                            0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) {
1264                 xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE);
1265                 return XFS_ERROR(EIO);
1266         }
1267
1268         /* This is the only portion of data that needs to persist */
1269         memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t));
1270
1271         /*
1272          * Clear the dirty field and remember the flush lsn for later use.
1273          */
1274         dqp->dq_flags &= ~(XFS_DQ_DIRTY);
1275         mp = dqp->q_mount;
1276
1277         /* lsn is 64 bits */
1278         spin_lock(&mp->m_ail_lock);
1279         dqp->q_logitem.qli_flush_lsn = dqp->q_logitem.qli_item.li_lsn;
1280         spin_unlock(&mp->m_ail_lock);
1281
1282         /*
1283          * Attach an iodone routine so that we can remove this dquot from the
1284          * AIL and release the flush lock once the dquot is synced to disk.
1285          */
1286         xfs_buf_attach_iodone(bp, (void(*)(xfs_buf_t *, xfs_log_item_t *))
1287                               xfs_qm_dqflush_done, &(dqp->q_logitem.qli_item));
1288         /*
1289          * If the buffer is pinned then push on the log so we won't
1290          * get stuck waiting in the write for too long.
1291          */
1292         if (XFS_BUF_ISPINNED(bp)) {
1293                 xfs_dqtrace_entry(dqp, "DQFLUSH LOG FORCE");
1294                 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE);
1295         }
1296
1297         if (flags & XFS_QMOPT_DELWRI) {
1298                 xfs_bdwrite(mp, bp);
1299         } else if (flags & XFS_QMOPT_ASYNC) {
1300                 error = xfs_bawrite(mp, bp);
1301         } else {
1302                 error = xfs_bwrite(mp, bp);
1303         }
1304         xfs_dqtrace_entry(dqp, "DQFLUSH END");
1305         /*
1306          * dqp is still locked, but caller is free to unlock it now.
1307          */
1308         return (error);
1309
1310 }
1311
1312 /*
1313  * This is the dquot flushing I/O completion routine.  It is called
1314  * from interrupt level when the buffer containing the dquot is
1315  * flushed to disk.  It is responsible for removing the dquot logitem
1316  * from the AIL if it has not been re-logged, and unlocking the dquot's
1317  * flush lock. This behavior is very similar to that of inodes..
1318  */
1319 /*ARGSUSED*/
1320 STATIC void
1321 xfs_qm_dqflush_done(
1322         xfs_buf_t               *bp,
1323         xfs_dq_logitem_t        *qip)
1324 {
1325         xfs_dquot_t             *dqp;
1326
1327         dqp = qip->qli_dquot;
1328
1329         /*
1330          * We only want to pull the item from the AIL if its
1331          * location in the log has not changed since we started the flush.
1332          * Thus, we only bother if the dquot's lsn has
1333          * not changed. First we check the lsn outside the lock
1334          * since it's cheaper, and then we recheck while
1335          * holding the lock before removing the dquot from the AIL.
1336          */
1337         if ((qip->qli_item.li_flags & XFS_LI_IN_AIL) &&
1338             qip->qli_item.li_lsn == qip->qli_flush_lsn) {
1339
1340                 spin_lock(&dqp->q_mount->m_ail_lock);
1341                 /*
1342                  * xfs_trans_delete_ail() drops the AIL lock.
1343                  */
1344                 if (qip->qli_item.li_lsn == qip->qli_flush_lsn)
1345                         xfs_trans_delete_ail(dqp->q_mount,
1346                                              (xfs_log_item_t*)qip);
1347                 else
1348                         spin_unlock(&dqp->q_mount->m_ail_lock);
1349         }
1350
1351         /*
1352          * Release the dq's flush lock since we're done with it.
1353          */
1354         xfs_dqfunlock(dqp);
1355 }
1356
1357 int
1358 xfs_qm_dqlock_nowait(
1359         xfs_dquot_t *dqp)
1360 {
1361         return mutex_trylock(&dqp->q_qlock);
1362 }
1363
1364 void
1365 xfs_dqlock(
1366         xfs_dquot_t *dqp)
1367 {
1368         mutex_lock(&dqp->q_qlock);
1369 }
1370
1371 void
1372 xfs_dqunlock(
1373         xfs_dquot_t *dqp)
1374 {
1375         mutex_unlock(&(dqp->q_qlock));
1376         if (dqp->q_logitem.qli_dquot == dqp) {
1377                 /* Once was dqp->q_mount, but might just have been cleared */
1378                 xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_mountp,
1379                                         (xfs_log_item_t*)&(dqp->q_logitem));
1380         }
1381 }
1382
1383
1384 void
1385 xfs_dqunlock_nonotify(
1386         xfs_dquot_t *dqp)
1387 {
1388         mutex_unlock(&(dqp->q_qlock));
1389 }
1390
1391 void
1392 xfs_dqlock2(
1393         xfs_dquot_t     *d1,
1394         xfs_dquot_t     *d2)
1395 {
1396         if (d1 && d2) {
1397                 ASSERT(d1 != d2);
1398                 if (be32_to_cpu(d1->q_core.d_id) >
1399                     be32_to_cpu(d2->q_core.d_id)) {
1400                         xfs_dqlock(d2);
1401                         xfs_dqlock(d1);
1402                 } else {
1403                         xfs_dqlock(d1);
1404                         xfs_dqlock(d2);
1405                 }
1406         } else {
1407                 if (d1) {
1408                         xfs_dqlock(d1);
1409                 } else if (d2) {
1410                         xfs_dqlock(d2);
1411                 }
1412         }
1413 }
1414
1415
1416 /*
1417  * Take a dquot out of the mount's dqlist as well as the hashlist.
1418  * This is called via unmount as well as quotaoff, and the purge
1419  * will always succeed unless there are soft (temp) references
1420  * outstanding.
1421  *
1422  * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
1423  * that we're returning! XXXsup - not cool.
1424  */
1425 /* ARGSUSED */
1426 int
1427 xfs_qm_dqpurge(
1428         xfs_dquot_t     *dqp)
1429 {
1430         xfs_dqhash_t    *thishash;
1431         xfs_mount_t     *mp = dqp->q_mount;
1432
1433         ASSERT(XFS_QM_IS_MPLIST_LOCKED(mp));
1434         ASSERT(XFS_DQ_IS_HASH_LOCKED(dqp->q_hash));
1435
1436         xfs_dqlock(dqp);
1437         /*
1438          * We really can't afford to purge a dquot that is
1439          * referenced, because these are hard refs.
1440          * It shouldn't happen in general because we went thru _all_ inodes in
1441          * dqrele_all_inodes before calling this and didn't let the mountlock go.
1442          * However it is possible that we have dquots with temporary
1443          * references that are not attached to an inode. e.g. see xfs_setattr().
1444          */
1445         if (dqp->q_nrefs != 0) {
1446                 xfs_dqunlock(dqp);
1447                 XFS_DQ_HASH_UNLOCK(dqp->q_hash);
1448                 return (1);
1449         }
1450
1451         ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
1452
1453         /*
1454          * If we're turning off quotas, we have to make sure that, for
1455          * example, we don't delete quota disk blocks while dquots are
1456          * in the process of getting written to those disk blocks.
1457          * This dquot might well be on AIL, and we can't leave it there
1458          * if we're turning off quotas. Basically, we need this flush
1459          * lock, and are willing to block on it.
1460          */
1461         if (!xfs_dqflock_nowait(dqp)) {
1462                 /*
1463                  * Block on the flush lock after nudging dquot buffer,
1464                  * if it is incore.
1465                  */
1466                 xfs_qm_dqflock_pushbuf_wait(dqp);
1467         }
1468
1469         /*
1470          * XXXIf we're turning this type of quotas off, we don't care
1471          * about the dirty metadata sitting in this dquot. OTOH, if
1472          * we're unmounting, we do care, so we flush it and wait.
1473          */
1474         if (XFS_DQ_IS_DIRTY(dqp)) {
1475                 int     error;
1476                 xfs_dqtrace_entry(dqp, "DQPURGE ->DQFLUSH: DQDIRTY");
1477                 /* dqflush unlocks dqflock */
1478                 /*
1479                  * Given that dqpurge is a very rare occurrence, it is OK
1480                  * that we're holding the hashlist and mplist locks
1481                  * across the disk write. But, ... XXXsup
1482                  *
1483                  * We don't care about getting disk errors here. We need
1484                  * to purge this dquot anyway, so we go ahead regardless.
1485                  */
1486                 error = xfs_qm_dqflush(dqp, XFS_QMOPT_SYNC);
1487                 if (error)
1488                         xfs_fs_cmn_err(CE_WARN, mp,
1489                                 "xfs_qm_dqpurge: dquot %p flush failed", dqp);
1490                 xfs_dqflock(dqp);
1491         }
1492         ASSERT(dqp->q_pincount == 0);
1493         ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
1494                !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
1495
1496         thishash = dqp->q_hash;
1497         XQM_HASHLIST_REMOVE(thishash, dqp);
1498         XQM_MPLIST_REMOVE(&(XFS_QI_MPL_LIST(mp)), dqp);
1499         /*
1500          * XXX Move this to the front of the freelist, if we can get the
1501          * freelist lock.
1502          */
1503         ASSERT(XFS_DQ_IS_ON_FREELIST(dqp));
1504
1505         dqp->q_mount = NULL;
1506         dqp->q_hash = NULL;
1507         dqp->dq_flags = XFS_DQ_INACTIVE;
1508         memset(&dqp->q_core, 0, sizeof(dqp->q_core));
1509         xfs_dqfunlock(dqp);
1510         xfs_dqunlock(dqp);
1511         XFS_DQ_HASH_UNLOCK(thishash);
1512         return (0);
1513 }
1514
1515
1516 #ifdef QUOTADEBUG
1517 void
1518 xfs_qm_dqprint(xfs_dquot_t *dqp)
1519 {
1520         cmn_err(CE_DEBUG, "-----------KERNEL DQUOT----------------");
1521         cmn_err(CE_DEBUG, "---- dquotID =  %d",
1522                 (int)be32_to_cpu(dqp->q_core.d_id));
1523         cmn_err(CE_DEBUG, "---- type    =  %s", DQFLAGTO_TYPESTR(dqp));
1524         cmn_err(CE_DEBUG, "---- fs      =  0x%p", dqp->q_mount);
1525         cmn_err(CE_DEBUG, "---- blkno   =  0x%x", (int) dqp->q_blkno);
1526         cmn_err(CE_DEBUG, "---- boffset =  0x%x", (int) dqp->q_bufoffset);
1527         cmn_err(CE_DEBUG, "---- blkhlimit =  %Lu (0x%x)",
1528                 be64_to_cpu(dqp->q_core.d_blk_hardlimit),
1529                 (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
1530         cmn_err(CE_DEBUG, "---- blkslimit =  %Lu (0x%x)",
1531                 be64_to_cpu(dqp->q_core.d_blk_softlimit),
1532                 (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
1533         cmn_err(CE_DEBUG, "---- inohlimit =  %Lu (0x%x)",
1534                 be64_to_cpu(dqp->q_core.d_ino_hardlimit),
1535                 (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
1536         cmn_err(CE_DEBUG, "---- inoslimit =  %Lu (0x%x)",
1537                 be64_to_cpu(dqp->q_core.d_ino_softlimit),
1538                 (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
1539         cmn_err(CE_DEBUG, "---- bcount  =  %Lu (0x%x)",
1540                 be64_to_cpu(dqp->q_core.d_bcount),
1541                 (int)be64_to_cpu(dqp->q_core.d_bcount));
1542         cmn_err(CE_DEBUG, "---- icount  =  %Lu (0x%x)",
1543                 be64_to_cpu(dqp->q_core.d_icount),
1544                 (int)be64_to_cpu(dqp->q_core.d_icount));
1545         cmn_err(CE_DEBUG, "---- btimer  =  %d",
1546                 (int)be32_to_cpu(dqp->q_core.d_btimer));
1547         cmn_err(CE_DEBUG, "---- itimer  =  %d",
1548                 (int)be32_to_cpu(dqp->q_core.d_itimer));
1549         cmn_err(CE_DEBUG, "---------------------------");
1550 }
1551 #endif
1552
1553 /*
1554  * Give the buffer a little push if it is incore and
1555  * wait on the flush lock.
1556  */
1557 void
1558 xfs_qm_dqflock_pushbuf_wait(
1559         xfs_dquot_t     *dqp)
1560 {
1561         xfs_buf_t       *bp;
1562
1563         /*
1564          * Check to see if the dquot has been flushed delayed
1565          * write.  If so, grab its buffer and send it
1566          * out immediately.  We'll be able to acquire
1567          * the flush lock when the I/O completes.
1568          */
1569         bp = xfs_incore(dqp->q_mount->m_ddev_targp, dqp->q_blkno,
1570                     XFS_QI_DQCHUNKLEN(dqp->q_mount),
1571                     XFS_INCORE_TRYLOCK);
1572         if (bp != NULL) {
1573                 if (XFS_BUF_ISDELAYWRITE(bp)) {
1574                         int     error;
1575                         if (XFS_BUF_ISPINNED(bp)) {
1576                                 xfs_log_force(dqp->q_mount,
1577                                               (xfs_lsn_t)0,
1578                                               XFS_LOG_FORCE);
1579                         }
1580                         error = xfs_bawrite(dqp->q_mount, bp);
1581                         if (error)
1582                                 xfs_fs_cmn_err(CE_WARN, dqp->q_mount,
1583                                         "xfs_qm_dqflock_pushbuf_wait: "
1584                                         "pushbuf error %d on dqp %p, bp %p",
1585                                         error, dqp, bp);
1586                 } else {
1587                         xfs_buf_relse(bp);
1588                 }
1589         }
1590         xfs_dqflock(dqp);
1591 }