ftrace: do not trace scheduler functions
[linux-2.6] / fs / xfs / xfs_attr.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 #include <linux/capability.h>
20
21 #include "xfs.h"
22 #include "xfs_fs.h"
23 #include "xfs_types.h"
24 #include "xfs_bit.h"
25 #include "xfs_log.h"
26 #include "xfs_inum.h"
27 #include "xfs_trans.h"
28 #include "xfs_sb.h"
29 #include "xfs_ag.h"
30 #include "xfs_dir2.h"
31 #include "xfs_dmapi.h"
32 #include "xfs_mount.h"
33 #include "xfs_da_btree.h"
34 #include "xfs_bmap_btree.h"
35 #include "xfs_alloc_btree.h"
36 #include "xfs_ialloc_btree.h"
37 #include "xfs_dir2_sf.h"
38 #include "xfs_attr_sf.h"
39 #include "xfs_dinode.h"
40 #include "xfs_inode.h"
41 #include "xfs_alloc.h"
42 #include "xfs_btree.h"
43 #include "xfs_inode_item.h"
44 #include "xfs_bmap.h"
45 #include "xfs_attr.h"
46 #include "xfs_attr_leaf.h"
47 #include "xfs_error.h"
48 #include "xfs_quota.h"
49 #include "xfs_trans_space.h"
50 #include "xfs_acl.h"
51 #include "xfs_rw.h"
52 #include "xfs_vnodeops.h"
53
54 /*
55  * xfs_attr.c
56  *
57  * Provide the external interfaces to manage attribute lists.
58  */
59
60 #define ATTR_SYSCOUNT   2
61 static struct attrnames posix_acl_access;
62 static struct attrnames posix_acl_default;
63 static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
64
65 /*========================================================================
66  * Function prototypes for the kernel.
67  *========================================================================*/
68
69 /*
70  * Internal routines when attribute list fits inside the inode.
71  */
72 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
73
74 /*
75  * Internal routines when attribute list is one block.
76  */
77 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
78 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
79 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
80 STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
81
82 /*
83  * Internal routines when attribute list is more than one block.
84  */
85 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
86 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
87 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
88 STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
89 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
90 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
91
92 /*
93  * Routines to manipulate out-of-line attribute values.
94  */
95 STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
96 STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
97
98 #define ATTR_RMTVALUE_MAPSIZE   1       /* # of map entries at once */
99
100 #if defined(XFS_ATTR_TRACE)
101 ktrace_t *xfs_attr_trace_buf;
102 #endif
103
104 STATIC int
105 xfs_attr_name_to_xname(
106         struct xfs_name *xname,
107         const char      *aname)
108 {
109         if (!aname)
110                 return EINVAL;
111         xname->name = aname;
112         xname->len = strlen(aname);
113         if (xname->len >= MAXNAMELEN)
114                 return EFAULT;          /* match IRIX behaviour */
115
116         return 0;
117 }
118
119 /*========================================================================
120  * Overall external interface routines.
121  *========================================================================*/
122
123 int
124 xfs_attr_fetch(xfs_inode_t *ip, struct xfs_name *name,
125                 char *value, int *valuelenp, int flags)
126 {
127         xfs_da_args_t   args;
128         int             error;
129
130         if ((XFS_IFORK_Q(ip) == 0) ||
131             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
132              ip->i_d.di_anextents == 0))
133                 return(ENOATTR);
134
135         /*
136          * Fill in the arg structure for this request.
137          */
138         memset((char *)&args, 0, sizeof(args));
139         args.name = name->name;
140         args.namelen = name->len;
141         args.value = value;
142         args.valuelen = *valuelenp;
143         args.flags = flags;
144         args.hashval = xfs_da_hashname(args.name, args.namelen);
145         args.dp = ip;
146         args.whichfork = XFS_ATTR_FORK;
147
148         /*
149          * Decide on what work routines to call based on the inode size.
150          */
151         if (XFS_IFORK_Q(ip) == 0 ||
152             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
153              ip->i_d.di_anextents == 0)) {
154                 error = XFS_ERROR(ENOATTR);
155         } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
156                 error = xfs_attr_shortform_getvalue(&args);
157         } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
158                 error = xfs_attr_leaf_get(&args);
159         } else {
160                 error = xfs_attr_node_get(&args);
161         }
162
163         /*
164          * Return the number of bytes in the value to the caller.
165          */
166         *valuelenp = args.valuelen;
167
168         if (error == EEXIST)
169                 error = 0;
170         return(error);
171 }
172
173 int
174 xfs_attr_get(
175         xfs_inode_t     *ip,
176         const char      *name,
177         char            *value,
178         int             *valuelenp,
179         int             flags)
180 {
181         int             error;
182         struct xfs_name xname;
183
184         XFS_STATS_INC(xs_attr_get);
185
186         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
187                 return(EIO);
188
189         error = xfs_attr_name_to_xname(&xname, name);
190         if (error)
191                 return error;
192
193         xfs_ilock(ip, XFS_ILOCK_SHARED);
194         error = xfs_attr_fetch(ip, &xname, value, valuelenp, flags);
195         xfs_iunlock(ip, XFS_ILOCK_SHARED);
196         return(error);
197 }
198
199 STATIC int
200 xfs_attr_set_int(xfs_inode_t *dp, struct xfs_name *name,
201                 char *value, int valuelen, int flags)
202 {
203         xfs_da_args_t   args;
204         xfs_fsblock_t   firstblock;
205         xfs_bmap_free_t flist;
206         int             error, err2, committed;
207         int             local, size;
208         uint            nblks;
209         xfs_mount_t     *mp = dp->i_mount;
210         int             rsvd = (flags & ATTR_ROOT) != 0;
211
212         /*
213          * Attach the dquots to the inode.
214          */
215         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
216                 return (error);
217
218         /*
219          * If the inode doesn't have an attribute fork, add one.
220          * (inode must not be locked when we call this routine)
221          */
222         if (XFS_IFORK_Q(dp) == 0) {
223                 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
224                               XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen);
225
226                 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
227                         return(error);
228         }
229
230         /*
231          * Fill in the arg structure for this request.
232          */
233         memset((char *)&args, 0, sizeof(args));
234         args.name = name->name;
235         args.namelen = name->len;
236         args.value = value;
237         args.valuelen = valuelen;
238         args.flags = flags;
239         args.hashval = xfs_da_hashname(args.name, args.namelen);
240         args.dp = dp;
241         args.firstblock = &firstblock;
242         args.flist = &flist;
243         args.whichfork = XFS_ATTR_FORK;
244         args.addname = 1;
245         args.oknoent = 1;
246
247         /*
248          * Determine space new attribute will use, and if it would be
249          * "local" or "remote" (note: local != inline).
250          */
251         size = xfs_attr_leaf_newentsize(name->len, valuelen,
252                                         mp->m_sb.sb_blocksize, &local);
253
254         nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
255         if (local) {
256                 if (size > (mp->m_sb.sb_blocksize >> 1)) {
257                         /* Double split possible */
258                         nblks <<= 1;
259                 }
260         } else {
261                 uint    dblocks = XFS_B_TO_FSB(mp, valuelen);
262                 /* Out of line attribute, cannot double split, but make
263                  * room for the attribute value itself.
264                  */
265                 nblks += dblocks;
266                 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
267         }
268
269         /* Size is now blocks for attribute data */
270         args.total = nblks;
271
272         /*
273          * Start our first transaction of the day.
274          *
275          * All future transactions during this code must be "chained" off
276          * this one via the trans_dup() call.  All transactions will contain
277          * the inode, and the inode will always be marked with trans_ihold().
278          * Since the inode will be locked in all transactions, we must log
279          * the inode in every transaction to let it float upward through
280          * the log.
281          */
282         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
283
284         /*
285          * Root fork attributes can use reserved data blocks for this
286          * operation if necessary
287          */
288
289         if (rsvd)
290                 args.trans->t_flags |= XFS_TRANS_RESERVE;
291
292         if ((error = xfs_trans_reserve(args.trans, (uint) nblks,
293                                       XFS_ATTRSET_LOG_RES(mp, nblks),
294                                       0, XFS_TRANS_PERM_LOG_RES,
295                                       XFS_ATTRSET_LOG_COUNT))) {
296                 xfs_trans_cancel(args.trans, 0);
297                 return(error);
298         }
299         xfs_ilock(dp, XFS_ILOCK_EXCL);
300
301         error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0,
302                          rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
303                                 XFS_QMOPT_RES_REGBLKS);
304         if (error) {
305                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
306                 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
307                 return (error);
308         }
309
310         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
311         xfs_trans_ihold(args.trans, dp);
312
313         /*
314          * If the attribute list is non-existent or a shortform list,
315          * upgrade it to a single-leaf-block attribute list.
316          */
317         if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
318             ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) &&
319              (dp->i_d.di_anextents == 0))) {
320
321                 /*
322                  * Build initial attribute list (if required).
323                  */
324                 if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
325                         xfs_attr_shortform_create(&args);
326
327                 /*
328                  * Try to add the attr to the attribute list in
329                  * the inode.
330                  */
331                 error = xfs_attr_shortform_addname(&args);
332                 if (error != ENOSPC) {
333                         /*
334                          * Commit the shortform mods, and we're done.
335                          * NOTE: this is also the error path (EEXIST, etc).
336                          */
337                         ASSERT(args.trans != NULL);
338
339                         /*
340                          * If this is a synchronous mount, make sure that
341                          * the transaction goes to disk before returning
342                          * to the user.
343                          */
344                         if (mp->m_flags & XFS_MOUNT_WSYNC) {
345                                 xfs_trans_set_sync(args.trans);
346                         }
347                         err2 = xfs_trans_commit(args.trans,
348                                                  XFS_TRANS_RELEASE_LOG_RES);
349                         xfs_iunlock(dp, XFS_ILOCK_EXCL);
350
351                         /*
352                          * Hit the inode change time.
353                          */
354                         if (!error && (flags & ATTR_KERNOTIME) == 0) {
355                                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
356                         }
357                         return(error == 0 ? err2 : error);
358                 }
359
360                 /*
361                  * It won't fit in the shortform, transform to a leaf block.
362                  * GROT: another possible req'mt for a double-split btree op.
363                  */
364                 XFS_BMAP_INIT(args.flist, args.firstblock);
365                 error = xfs_attr_shortform_to_leaf(&args);
366                 if (!error) {
367                         error = xfs_bmap_finish(&args.trans, args.flist,
368                                                 &committed);
369                 }
370                 if (error) {
371                         ASSERT(committed);
372                         args.trans = NULL;
373                         xfs_bmap_cancel(&flist);
374                         goto out;
375                 }
376
377                 /*
378                  * bmap_finish() may have committed the last trans and started
379                  * a new one.  We need the inode to be in all transactions.
380                  */
381                 if (committed) {
382                         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
383                         xfs_trans_ihold(args.trans, dp);
384                 }
385
386                 /*
387                  * Commit the leaf transformation.  We'll need another (linked)
388                  * transaction to add the new attribute to the leaf.
389                  */
390                 if ((error = xfs_attr_rolltrans(&args.trans, dp)))
391                         goto out;
392
393         }
394
395         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
396                 error = xfs_attr_leaf_addname(&args);
397         } else {
398                 error = xfs_attr_node_addname(&args);
399         }
400         if (error) {
401                 goto out;
402         }
403
404         /*
405          * If this is a synchronous mount, make sure that the
406          * transaction goes to disk before returning to the user.
407          */
408         if (mp->m_flags & XFS_MOUNT_WSYNC) {
409                 xfs_trans_set_sync(args.trans);
410         }
411
412         /*
413          * Commit the last in the sequence of transactions.
414          */
415         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
416         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
417         xfs_iunlock(dp, XFS_ILOCK_EXCL);
418
419         /*
420          * Hit the inode change time.
421          */
422         if (!error && (flags & ATTR_KERNOTIME) == 0) {
423                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
424         }
425
426         return(error);
427
428 out:
429         if (args.trans)
430                 xfs_trans_cancel(args.trans,
431                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
432         xfs_iunlock(dp, XFS_ILOCK_EXCL);
433         return(error);
434 }
435
436 int
437 xfs_attr_set(
438         xfs_inode_t     *dp,
439         const char      *name,
440         char            *value,
441         int             valuelen,
442         int             flags)
443 {
444         int             error;
445         struct xfs_name xname;
446
447         XFS_STATS_INC(xs_attr_set);
448
449         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
450                 return (EIO);
451
452         error = xfs_attr_name_to_xname(&xname, name);
453         if (error)
454                 return error;
455
456         return xfs_attr_set_int(dp, &xname, value, valuelen, flags);
457 }
458
459 /*
460  * Generic handler routine to remove a name from an attribute list.
461  * Transitions attribute list from Btree to shortform as necessary.
462  */
463 STATIC int
464 xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
465 {
466         xfs_da_args_t   args;
467         xfs_fsblock_t   firstblock;
468         xfs_bmap_free_t flist;
469         int             error;
470         xfs_mount_t     *mp = dp->i_mount;
471
472         /*
473          * Fill in the arg structure for this request.
474          */
475         memset((char *)&args, 0, sizeof(args));
476         args.name = name->name;
477         args.namelen = name->len;
478         args.flags = flags;
479         args.hashval = xfs_da_hashname(args.name, args.namelen);
480         args.dp = dp;
481         args.firstblock = &firstblock;
482         args.flist = &flist;
483         args.total = 0;
484         args.whichfork = XFS_ATTR_FORK;
485
486         /*
487          * Attach the dquots to the inode.
488          */
489         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
490                 return (error);
491
492         /*
493          * Start our first transaction of the day.
494          *
495          * All future transactions during this code must be "chained" off
496          * this one via the trans_dup() call.  All transactions will contain
497          * the inode, and the inode will always be marked with trans_ihold().
498          * Since the inode will be locked in all transactions, we must log
499          * the inode in every transaction to let it float upward through
500          * the log.
501          */
502         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
503
504         /*
505          * Root fork attributes can use reserved data blocks for this
506          * operation if necessary
507          */
508
509         if (flags & ATTR_ROOT)
510                 args.trans->t_flags |= XFS_TRANS_RESERVE;
511
512         if ((error = xfs_trans_reserve(args.trans,
513                                       XFS_ATTRRM_SPACE_RES(mp),
514                                       XFS_ATTRRM_LOG_RES(mp),
515                                       0, XFS_TRANS_PERM_LOG_RES,
516                                       XFS_ATTRRM_LOG_COUNT))) {
517                 xfs_trans_cancel(args.trans, 0);
518                 return(error);
519         }
520
521         xfs_ilock(dp, XFS_ILOCK_EXCL);
522         /*
523          * No need to make quota reservations here. We expect to release some
524          * blocks not allocate in the common case.
525          */
526         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
527         xfs_trans_ihold(args.trans, dp);
528
529         /*
530          * Decide on what work routines to call based on the inode size.
531          */
532         if (XFS_IFORK_Q(dp) == 0 ||
533             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
534              dp->i_d.di_anextents == 0)) {
535                 error = XFS_ERROR(ENOATTR);
536                 goto out;
537         }
538         if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
539                 ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
540                 error = xfs_attr_shortform_remove(&args);
541                 if (error) {
542                         goto out;
543                 }
544         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
545                 error = xfs_attr_leaf_removename(&args);
546         } else {
547                 error = xfs_attr_node_removename(&args);
548         }
549         if (error) {
550                 goto out;
551         }
552
553         /*
554          * If this is a synchronous mount, make sure that the
555          * transaction goes to disk before returning to the user.
556          */
557         if (mp->m_flags & XFS_MOUNT_WSYNC) {
558                 xfs_trans_set_sync(args.trans);
559         }
560
561         /*
562          * Commit the last in the sequence of transactions.
563          */
564         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
565         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
566         xfs_iunlock(dp, XFS_ILOCK_EXCL);
567
568         /*
569          * Hit the inode change time.
570          */
571         if (!error && (flags & ATTR_KERNOTIME) == 0) {
572                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
573         }
574
575         return(error);
576
577 out:
578         if (args.trans)
579                 xfs_trans_cancel(args.trans,
580                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
581         xfs_iunlock(dp, XFS_ILOCK_EXCL);
582         return(error);
583 }
584
585 int
586 xfs_attr_remove(
587         xfs_inode_t     *dp,
588         const char      *name,
589         int             flags)
590 {
591         int             error;
592         struct xfs_name xname;
593
594         XFS_STATS_INC(xs_attr_remove);
595
596         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
597                 return (EIO);
598
599         error = xfs_attr_name_to_xname(&xname, name);
600         if (error)
601                 return error;
602
603         xfs_ilock(dp, XFS_ILOCK_SHARED);
604         if (XFS_IFORK_Q(dp) == 0 ||
605                    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
606                     dp->i_d.di_anextents == 0)) {
607                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
608                 return(XFS_ERROR(ENOATTR));
609         }
610         xfs_iunlock(dp, XFS_ILOCK_SHARED);
611
612         return xfs_attr_remove_int(dp, &xname, flags);
613 }
614
615 STATIC int
616 xfs_attr_list_int(xfs_attr_list_context_t *context)
617 {
618         int error;
619         xfs_inode_t *dp = context->dp;
620
621         /*
622          * Decide on what work routines to call based on the inode size.
623          */
624         if (XFS_IFORK_Q(dp) == 0 ||
625             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
626              dp->i_d.di_anextents == 0)) {
627                 error = 0;
628         } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
629                 error = xfs_attr_shortform_list(context);
630         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
631                 error = xfs_attr_leaf_list(context);
632         } else {
633                 error = xfs_attr_node_list(context);
634         }
635         return error;
636 }
637
638 #define ATTR_ENTBASESIZE                /* minimum bytes used by an attr */ \
639         (((struct attrlist_ent *) 0)->a_name - (char *) 0)
640 #define ATTR_ENTSIZE(namelen)           /* actual bytes used by an attr */ \
641         ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
642          & ~(sizeof(u_int32_t)-1))
643
644 /*
645  * Format an attribute and copy it out to the user's buffer.
646  * Take care to check values and protect against them changing later,
647  * we may be reading them directly out of a user buffer.
648  */
649 /*ARGSUSED*/
650 STATIC int
651 xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
652                      char *name, int namelen,
653                      int valuelen, char *value)
654 {
655         attrlist_ent_t *aep;
656         int arraytop;
657
658         ASSERT(!(context->flags & ATTR_KERNOVAL));
659         ASSERT(context->count >= 0);
660         ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
661         ASSERT(context->firstu >= sizeof(*context->alist));
662         ASSERT(context->firstu <= context->bufsize);
663
664         arraytop = sizeof(*context->alist) +
665                         context->count * sizeof(context->alist->al_offset[0]);
666         context->firstu -= ATTR_ENTSIZE(namelen);
667         if (context->firstu < arraytop) {
668                 xfs_attr_trace_l_c("buffer full", context);
669                 context->alist->al_more = 1;
670                 context->seen_enough = 1;
671                 return 1;
672         }
673
674         aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
675         aep->a_valuelen = valuelen;
676         memcpy(aep->a_name, name, namelen);
677         aep->a_name[ namelen ] = 0;
678         context->alist->al_offset[ context->count++ ] = context->firstu;
679         context->alist->al_count = context->count;
680         xfs_attr_trace_l_c("add", context);
681         return 0;
682 }
683
684 STATIC int
685 xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
686                      char *name, int namelen,
687                      int valuelen, char *value)
688 {
689         char *offset;
690         int arraytop;
691
692         ASSERT(context->count >= 0);
693
694         arraytop = context->count + namesp->attr_namelen + namelen + 1;
695         if (arraytop > context->firstu) {
696                 context->count = -1;    /* insufficient space */
697                 return 1;
698         }
699         offset = (char *)context->alist + context->count;
700         strncpy(offset, namesp->attr_name, namesp->attr_namelen);
701         offset += namesp->attr_namelen;
702         strncpy(offset, name, namelen);                 /* real name */
703         offset += namelen;
704         *offset = '\0';
705         context->count += namesp->attr_namelen + namelen + 1;
706         return 0;
707 }
708
709 /*ARGSUSED*/
710 STATIC int
711 xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
712                      char *name, int namelen,
713                      int valuelen, char *value)
714 {
715         context->count += namesp->attr_namelen + namelen + 1;
716         return 0;
717 }
718
719 /*
720  * Generate a list of extended attribute names and optionally
721  * also value lengths.  Positive return value follows the XFS
722  * convention of being an error, zero or negative return code
723  * is the length of the buffer returned (negated), indicating
724  * success.
725  */
726 int
727 xfs_attr_list(
728         xfs_inode_t     *dp,
729         char            *buffer,
730         int             bufsize,
731         int             flags,
732         attrlist_cursor_kern_t *cursor)
733 {
734         xfs_attr_list_context_t context;
735         int error;
736
737         XFS_STATS_INC(xs_attr_list);
738
739         /*
740          * Validate the cursor.
741          */
742         if (cursor->pad1 || cursor->pad2)
743                 return(XFS_ERROR(EINVAL));
744         if ((cursor->initted == 0) &&
745             (cursor->hashval || cursor->blkno || cursor->offset))
746                 return XFS_ERROR(EINVAL);
747
748         /*
749          * Check for a properly aligned buffer.
750          */
751         if (((long)buffer) & (sizeof(int)-1))
752                 return XFS_ERROR(EFAULT);
753         if (flags & ATTR_KERNOVAL)
754                 bufsize = 0;
755
756         /*
757          * Initialize the output buffer.
758          */
759         context.dp = dp;
760         context.cursor = cursor;
761         context.count = 0;
762         context.dupcnt = 0;
763         context.resynch = 1;
764         context.flags = flags;
765         context.seen_enough = 0;
766         context.alist = (attrlist_t *)buffer;
767         context.put_value = 0;
768
769         if (flags & ATTR_KERNAMELS) {
770                 context.bufsize = bufsize;
771                 context.firstu = context.bufsize;
772                 if (flags & ATTR_KERNOVAL)
773                         context.put_listent = xfs_attr_kern_list_sizes;
774                 else
775                         context.put_listent = xfs_attr_kern_list;
776         } else {
777                 context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
778                 context.firstu = context.bufsize;
779                 context.alist->al_count = 0;
780                 context.alist->al_more = 0;
781                 context.alist->al_offset[0] = context.bufsize;
782                 context.put_listent = xfs_attr_put_listent;
783         }
784
785         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
786                 return EIO;
787
788         xfs_ilock(dp, XFS_ILOCK_SHARED);
789         xfs_attr_trace_l_c("syscall start", &context);
790
791         error = xfs_attr_list_int(&context);
792
793         xfs_iunlock(dp, XFS_ILOCK_SHARED);
794         xfs_attr_trace_l_c("syscall end", &context);
795
796         if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
797                 /* must return negated buffer size or the error */
798                 if (context.count < 0)
799                         error = XFS_ERROR(ERANGE);
800                 else
801                         error = -context.count;
802         } else
803                 ASSERT(error >= 0);
804
805         return error;
806 }
807
808 int                                                             /* error */
809 xfs_attr_inactive(xfs_inode_t *dp)
810 {
811         xfs_trans_t *trans;
812         xfs_mount_t *mp;
813         int error;
814
815         mp = dp->i_mount;
816         ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
817
818         xfs_ilock(dp, XFS_ILOCK_SHARED);
819         if ((XFS_IFORK_Q(dp) == 0) ||
820             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
821             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
822              dp->i_d.di_anextents == 0)) {
823                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
824                 return(0);
825         }
826         xfs_iunlock(dp, XFS_ILOCK_SHARED);
827
828         /*
829          * Start our first transaction of the day.
830          *
831          * All future transactions during this code must be "chained" off
832          * this one via the trans_dup() call.  All transactions will contain
833          * the inode, and the inode will always be marked with trans_ihold().
834          * Since the inode will be locked in all transactions, we must log
835          * the inode in every transaction to let it float upward through
836          * the log.
837          */
838         trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
839         if ((error = xfs_trans_reserve(trans, 0, XFS_ATTRINVAL_LOG_RES(mp), 0,
840                                       XFS_TRANS_PERM_LOG_RES,
841                                       XFS_ATTRINVAL_LOG_COUNT))) {
842                 xfs_trans_cancel(trans, 0);
843                 return(error);
844         }
845         xfs_ilock(dp, XFS_ILOCK_EXCL);
846
847         /*
848          * No need to make quota reservations here. We expect to release some
849          * blocks, not allocate, in the common case.
850          */
851         xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
852         xfs_trans_ihold(trans, dp);
853
854         /*
855          * Decide on what work routines to call based on the inode size.
856          */
857         if ((XFS_IFORK_Q(dp) == 0) ||
858             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
859             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
860              dp->i_d.di_anextents == 0)) {
861                 error = 0;
862                 goto out;
863         }
864         error = xfs_attr_root_inactive(&trans, dp);
865         if (error)
866                 goto out;
867         /*
868          * signal synchronous inactive transactions unless this
869          * is a synchronous mount filesystem in which case we
870          * know that we're here because we've been called out of
871          * xfs_inactive which means that the last reference is gone
872          * and the unlink transaction has already hit the disk so
873          * async inactive transactions are safe.
874          */
875         if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
876                                 (!(mp->m_flags & XFS_MOUNT_WSYNC)
877                                  ? 1 : 0))))
878                 goto out;
879
880         /*
881          * Commit the last in the sequence of transactions.
882          */
883         xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
884         error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
885         xfs_iunlock(dp, XFS_ILOCK_EXCL);
886
887         return(error);
888
889 out:
890         xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
891         xfs_iunlock(dp, XFS_ILOCK_EXCL);
892         return(error);
893 }
894
895
896
897 /*========================================================================
898  * External routines when attribute list is inside the inode
899  *========================================================================*/
900
901 /*
902  * Add a name to the shortform attribute list structure
903  * This is the external routine.
904  */
905 STATIC int
906 xfs_attr_shortform_addname(xfs_da_args_t *args)
907 {
908         int newsize, forkoff, retval;
909
910         retval = xfs_attr_shortform_lookup(args);
911         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
912                 return(retval);
913         } else if (retval == EEXIST) {
914                 if (args->flags & ATTR_CREATE)
915                         return(retval);
916                 retval = xfs_attr_shortform_remove(args);
917                 ASSERT(retval == 0);
918         }
919
920         if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
921             args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
922                 return(XFS_ERROR(ENOSPC));
923
924         newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
925         newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
926
927         forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
928         if (!forkoff)
929                 return(XFS_ERROR(ENOSPC));
930
931         xfs_attr_shortform_add(args, forkoff);
932         return(0);
933 }
934
935
936 /*========================================================================
937  * External routines when attribute list is one block
938  *========================================================================*/
939
940 /*
941  * Add a name to the leaf attribute list structure
942  *
943  * This leaf block cannot have a "remote" value, we only call this routine
944  * if bmap_one_block() says there is only one block (ie: no remote blks).
945  */
946 STATIC int
947 xfs_attr_leaf_addname(xfs_da_args_t *args)
948 {
949         xfs_inode_t *dp;
950         xfs_dabuf_t *bp;
951         int retval, error, committed, forkoff;
952
953         /*
954          * Read the (only) block in the attribute list in.
955          */
956         dp = args->dp;
957         args->blkno = 0;
958         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
959                                              XFS_ATTR_FORK);
960         if (error)
961                 return(error);
962         ASSERT(bp != NULL);
963
964         /*
965          * Look up the given attribute in the leaf block.  Figure out if
966          * the given flags produce an error or call for an atomic rename.
967          */
968         retval = xfs_attr_leaf_lookup_int(bp, args);
969         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
970                 xfs_da_brelse(args->trans, bp);
971                 return(retval);
972         } else if (retval == EEXIST) {
973                 if (args->flags & ATTR_CREATE) {        /* pure create op */
974                         xfs_da_brelse(args->trans, bp);
975                         return(retval);
976                 }
977                 args->rename = 1;                       /* an atomic rename */
978                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
979                 args->index2 = args->index;
980                 args->rmtblkno2 = args->rmtblkno;
981                 args->rmtblkcnt2 = args->rmtblkcnt;
982         }
983
984         /*
985          * Add the attribute to the leaf block, transitioning to a Btree
986          * if required.
987          */
988         retval = xfs_attr_leaf_add(bp, args);
989         xfs_da_buf_done(bp);
990         if (retval == ENOSPC) {
991                 /*
992                  * Promote the attribute list to the Btree format, then
993                  * Commit that transaction so that the node_addname() call
994                  * can manage its own transactions.
995                  */
996                 XFS_BMAP_INIT(args->flist, args->firstblock);
997                 error = xfs_attr_leaf_to_node(args);
998                 if (!error) {
999                         error = xfs_bmap_finish(&args->trans, args->flist,
1000                                                 &committed);
1001                 }
1002                 if (error) {
1003                         ASSERT(committed);
1004                         args->trans = NULL;
1005                         xfs_bmap_cancel(args->flist);
1006                         return(error);
1007                 }
1008
1009                 /*
1010                  * bmap_finish() may have committed the last trans and started
1011                  * a new one.  We need the inode to be in all transactions.
1012                  */
1013                 if (committed) {
1014                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1015                         xfs_trans_ihold(args->trans, dp);
1016                 }
1017
1018                 /*
1019                  * Commit the current trans (including the inode) and start
1020                  * a new one.
1021                  */
1022                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1023                         return (error);
1024
1025                 /*
1026                  * Fob the whole rest of the problem off on the Btree code.
1027                  */
1028                 error = xfs_attr_node_addname(args);
1029                 return(error);
1030         }
1031
1032         /*
1033          * Commit the transaction that added the attr name so that
1034          * later routines can manage their own transactions.
1035          */
1036         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1037                 return (error);
1038
1039         /*
1040          * If there was an out-of-line value, allocate the blocks we
1041          * identified for its storage and copy the value.  This is done
1042          * after we create the attribute so that we don't overflow the
1043          * maximum size of a transaction and/or hit a deadlock.
1044          */
1045         if (args->rmtblkno > 0) {
1046                 error = xfs_attr_rmtval_set(args);
1047                 if (error)
1048                         return(error);
1049         }
1050
1051         /*
1052          * If this is an atomic rename operation, we must "flip" the
1053          * incomplete flags on the "new" and "old" attribute/value pairs
1054          * so that one disappears and one appears atomically.  Then we
1055          * must remove the "old" attribute/value pair.
1056          */
1057         if (args->rename) {
1058                 /*
1059                  * In a separate transaction, set the incomplete flag on the
1060                  * "old" attr and clear the incomplete flag on the "new" attr.
1061                  */
1062                 error = xfs_attr_leaf_flipflags(args);
1063                 if (error)
1064                         return(error);
1065
1066                 /*
1067                  * Dismantle the "old" attribute/value pair by removing
1068                  * a "remote" value (if it exists).
1069                  */
1070                 args->index = args->index2;
1071                 args->blkno = args->blkno2;
1072                 args->rmtblkno = args->rmtblkno2;
1073                 args->rmtblkcnt = args->rmtblkcnt2;
1074                 if (args->rmtblkno) {
1075                         error = xfs_attr_rmtval_remove(args);
1076                         if (error)
1077                                 return(error);
1078                 }
1079
1080                 /*
1081                  * Read in the block containing the "old" attr, then
1082                  * remove the "old" attr from that block (neat, huh!)
1083                  */
1084                 error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1,
1085                                                      &bp, XFS_ATTR_FORK);
1086                 if (error)
1087                         return(error);
1088                 ASSERT(bp != NULL);
1089                 (void)xfs_attr_leaf_remove(bp, args);
1090
1091                 /*
1092                  * If the result is small enough, shrink it all into the inode.
1093                  */
1094                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1095                         XFS_BMAP_INIT(args->flist, args->firstblock);
1096                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1097                         /* bp is gone due to xfs_da_shrink_inode */
1098                         if (!error) {
1099                                 error = xfs_bmap_finish(&args->trans,
1100                                                         args->flist,
1101                                                         &committed);
1102                         }
1103                         if (error) {
1104                                 ASSERT(committed);
1105                                 args->trans = NULL;
1106                                 xfs_bmap_cancel(args->flist);
1107                                 return(error);
1108                         }
1109
1110                         /*
1111                          * bmap_finish() may have committed the last trans
1112                          * and started a new one.  We need the inode to be
1113                          * in all transactions.
1114                          */
1115                         if (committed) {
1116                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1117                                 xfs_trans_ihold(args->trans, dp);
1118                         }
1119                 } else
1120                         xfs_da_buf_done(bp);
1121
1122                 /*
1123                  * Commit the remove and start the next trans in series.
1124                  */
1125                 error = xfs_attr_rolltrans(&args->trans, dp);
1126
1127         } else if (args->rmtblkno > 0) {
1128                 /*
1129                  * Added a "remote" value, just clear the incomplete flag.
1130                  */
1131                 error = xfs_attr_leaf_clearflag(args);
1132         }
1133         return(error);
1134 }
1135
1136 /*
1137  * Remove a name from the leaf attribute list structure
1138  *
1139  * This leaf block cannot have a "remote" value, we only call this routine
1140  * if bmap_one_block() says there is only one block (ie: no remote blks).
1141  */
1142 STATIC int
1143 xfs_attr_leaf_removename(xfs_da_args_t *args)
1144 {
1145         xfs_inode_t *dp;
1146         xfs_dabuf_t *bp;
1147         int error, committed, forkoff;
1148
1149         /*
1150          * Remove the attribute.
1151          */
1152         dp = args->dp;
1153         args->blkno = 0;
1154         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1155                                              XFS_ATTR_FORK);
1156         if (error) {
1157                 return(error);
1158         }
1159
1160         ASSERT(bp != NULL);
1161         error = xfs_attr_leaf_lookup_int(bp, args);
1162         if (error == ENOATTR) {
1163                 xfs_da_brelse(args->trans, bp);
1164                 return(error);
1165         }
1166
1167         (void)xfs_attr_leaf_remove(bp, args);
1168
1169         /*
1170          * If the result is small enough, shrink it all into the inode.
1171          */
1172         if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1173                 XFS_BMAP_INIT(args->flist, args->firstblock);
1174                 error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1175                 /* bp is gone due to xfs_da_shrink_inode */
1176                 if (!error) {
1177                         error = xfs_bmap_finish(&args->trans, args->flist,
1178                                                 &committed);
1179                 }
1180                 if (error) {
1181                         ASSERT(committed);
1182                         args->trans = NULL;
1183                         xfs_bmap_cancel(args->flist);
1184                         return(error);
1185                 }
1186
1187                 /*
1188                  * bmap_finish() may have committed the last trans and started
1189                  * a new one.  We need the inode to be in all transactions.
1190                  */
1191                 if (committed) {
1192                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1193                         xfs_trans_ihold(args->trans, dp);
1194                 }
1195         } else
1196                 xfs_da_buf_done(bp);
1197         return(0);
1198 }
1199
1200 /*
1201  * Look up a name in a leaf attribute list structure.
1202  *
1203  * This leaf block cannot have a "remote" value, we only call this routine
1204  * if bmap_one_block() says there is only one block (ie: no remote blks).
1205  */
1206 STATIC int
1207 xfs_attr_leaf_get(xfs_da_args_t *args)
1208 {
1209         xfs_dabuf_t *bp;
1210         int error;
1211
1212         args->blkno = 0;
1213         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1214                                              XFS_ATTR_FORK);
1215         if (error)
1216                 return(error);
1217         ASSERT(bp != NULL);
1218
1219         error = xfs_attr_leaf_lookup_int(bp, args);
1220         if (error != EEXIST)  {
1221                 xfs_da_brelse(args->trans, bp);
1222                 return(error);
1223         }
1224         error = xfs_attr_leaf_getvalue(bp, args);
1225         xfs_da_brelse(args->trans, bp);
1226         if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
1227                 error = xfs_attr_rmtval_get(args);
1228         }
1229         return(error);
1230 }
1231
1232 /*
1233  * Copy out attribute entries for attr_list(), for leaf attribute lists.
1234  */
1235 STATIC int
1236 xfs_attr_leaf_list(xfs_attr_list_context_t *context)
1237 {
1238         xfs_attr_leafblock_t *leaf;
1239         int error;
1240         xfs_dabuf_t *bp;
1241
1242         context->cursor->blkno = 0;
1243         error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
1244         if (error)
1245                 return XFS_ERROR(error);
1246         ASSERT(bp != NULL);
1247         leaf = bp->data;
1248         if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
1249                 XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
1250                                      context->dp->i_mount, leaf);
1251                 xfs_da_brelse(NULL, bp);
1252                 return XFS_ERROR(EFSCORRUPTED);
1253         }
1254
1255         error = xfs_attr_leaf_list_int(bp, context);
1256         xfs_da_brelse(NULL, bp);
1257         return XFS_ERROR(error);
1258 }
1259
1260
1261 /*========================================================================
1262  * External routines when attribute list size > XFS_LBSIZE(mp).
1263  *========================================================================*/
1264
1265 /*
1266  * Add a name to a Btree-format attribute list.
1267  *
1268  * This will involve walking down the Btree, and may involve splitting
1269  * leaf nodes and even splitting intermediate nodes up to and including
1270  * the root node (a special case of an intermediate node).
1271  *
1272  * "Remote" attribute values confuse the issue and atomic rename operations
1273  * add a whole extra layer of confusion on top of that.
1274  */
1275 STATIC int
1276 xfs_attr_node_addname(xfs_da_args_t *args)
1277 {
1278         xfs_da_state_t *state;
1279         xfs_da_state_blk_t *blk;
1280         xfs_inode_t *dp;
1281         xfs_mount_t *mp;
1282         int committed, retval, error;
1283
1284         /*
1285          * Fill in bucket of arguments/results/context to carry around.
1286          */
1287         dp = args->dp;
1288         mp = dp->i_mount;
1289 restart:
1290         state = xfs_da_state_alloc();
1291         state->args = args;
1292         state->mp = mp;
1293         state->blocksize = state->mp->m_sb.sb_blocksize;
1294         state->node_ents = state->mp->m_attr_node_ents;
1295
1296         /*
1297          * Search to see if name already exists, and get back a pointer
1298          * to where it should go.
1299          */
1300         error = xfs_da_node_lookup_int(state, &retval);
1301         if (error)
1302                 goto out;
1303         blk = &state->path.blk[ state->path.active-1 ];
1304         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1305         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
1306                 goto out;
1307         } else if (retval == EEXIST) {
1308                 if (args->flags & ATTR_CREATE)
1309                         goto out;
1310                 args->rename = 1;                       /* atomic rename op */
1311                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
1312                 args->index2 = args->index;
1313                 args->rmtblkno2 = args->rmtblkno;
1314                 args->rmtblkcnt2 = args->rmtblkcnt;
1315                 args->rmtblkno = 0;
1316                 args->rmtblkcnt = 0;
1317         }
1318
1319         retval = xfs_attr_leaf_add(blk->bp, state->args);
1320         if (retval == ENOSPC) {
1321                 if (state->path.active == 1) {
1322                         /*
1323                          * Its really a single leaf node, but it had
1324                          * out-of-line values so it looked like it *might*
1325                          * have been a b-tree.
1326                          */
1327                         xfs_da_state_free(state);
1328                         XFS_BMAP_INIT(args->flist, args->firstblock);
1329                         error = xfs_attr_leaf_to_node(args);
1330                         if (!error) {
1331                                 error = xfs_bmap_finish(&args->trans,
1332                                                         args->flist,
1333                                                         &committed);
1334                         }
1335                         if (error) {
1336                                 ASSERT(committed);
1337                                 args->trans = NULL;
1338                                 xfs_bmap_cancel(args->flist);
1339                                 goto out;
1340                         }
1341
1342                         /*
1343                          * bmap_finish() may have committed the last trans
1344                          * and started a new one.  We need the inode to be
1345                          * in all transactions.
1346                          */
1347                         if (committed) {
1348                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1349                                 xfs_trans_ihold(args->trans, dp);
1350                         }
1351
1352                         /*
1353                          * Commit the node conversion and start the next
1354                          * trans in the chain.
1355                          */
1356                         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1357                                 goto out;
1358
1359                         goto restart;
1360                 }
1361
1362                 /*
1363                  * Split as many Btree elements as required.
1364                  * This code tracks the new and old attr's location
1365                  * in the index/blkno/rmtblkno/rmtblkcnt fields and
1366                  * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
1367                  */
1368                 XFS_BMAP_INIT(args->flist, args->firstblock);
1369                 error = xfs_da_split(state);
1370                 if (!error) {
1371                         error = xfs_bmap_finish(&args->trans, args->flist,
1372                                                 &committed);
1373                 }
1374                 if (error) {
1375                         ASSERT(committed);
1376                         args->trans = NULL;
1377                         xfs_bmap_cancel(args->flist);
1378                         goto out;
1379                 }
1380
1381                 /*
1382                  * bmap_finish() may have committed the last trans and started
1383                  * a new one.  We need the inode to be in all transactions.
1384                  */
1385                 if (committed) {
1386                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1387                         xfs_trans_ihold(args->trans, dp);
1388                 }
1389         } else {
1390                 /*
1391                  * Addition succeeded, update Btree hashvals.
1392                  */
1393                 xfs_da_fixhashpath(state, &state->path);
1394         }
1395
1396         /*
1397          * Kill the state structure, we're done with it and need to
1398          * allow the buffers to come back later.
1399          */
1400         xfs_da_state_free(state);
1401         state = NULL;
1402
1403         /*
1404          * Commit the leaf addition or btree split and start the next
1405          * trans in the chain.
1406          */
1407         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1408                 goto out;
1409
1410         /*
1411          * If there was an out-of-line value, allocate the blocks we
1412          * identified for its storage and copy the value.  This is done
1413          * after we create the attribute so that we don't overflow the
1414          * maximum size of a transaction and/or hit a deadlock.
1415          */
1416         if (args->rmtblkno > 0) {
1417                 error = xfs_attr_rmtval_set(args);
1418                 if (error)
1419                         return(error);
1420         }
1421
1422         /*
1423          * If this is an atomic rename operation, we must "flip" the
1424          * incomplete flags on the "new" and "old" attribute/value pairs
1425          * so that one disappears and one appears atomically.  Then we
1426          * must remove the "old" attribute/value pair.
1427          */
1428         if (args->rename) {
1429                 /*
1430                  * In a separate transaction, set the incomplete flag on the
1431                  * "old" attr and clear the incomplete flag on the "new" attr.
1432                  */
1433                 error = xfs_attr_leaf_flipflags(args);
1434                 if (error)
1435                         goto out;
1436
1437                 /*
1438                  * Dismantle the "old" attribute/value pair by removing
1439                  * a "remote" value (if it exists).
1440                  */
1441                 args->index = args->index2;
1442                 args->blkno = args->blkno2;
1443                 args->rmtblkno = args->rmtblkno2;
1444                 args->rmtblkcnt = args->rmtblkcnt2;
1445                 if (args->rmtblkno) {
1446                         error = xfs_attr_rmtval_remove(args);
1447                         if (error)
1448                                 return(error);
1449                 }
1450
1451                 /*
1452                  * Re-find the "old" attribute entry after any split ops.
1453                  * The INCOMPLETE flag means that we will find the "old"
1454                  * attr, not the "new" one.
1455                  */
1456                 args->flags |= XFS_ATTR_INCOMPLETE;
1457                 state = xfs_da_state_alloc();
1458                 state->args = args;
1459                 state->mp = mp;
1460                 state->blocksize = state->mp->m_sb.sb_blocksize;
1461                 state->node_ents = state->mp->m_attr_node_ents;
1462                 state->inleaf = 0;
1463                 error = xfs_da_node_lookup_int(state, &retval);
1464                 if (error)
1465                         goto out;
1466
1467                 /*
1468                  * Remove the name and update the hashvals in the tree.
1469                  */
1470                 blk = &state->path.blk[ state->path.active-1 ];
1471                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1472                 error = xfs_attr_leaf_remove(blk->bp, args);
1473                 xfs_da_fixhashpath(state, &state->path);
1474
1475                 /*
1476                  * Check to see if the tree needs to be collapsed.
1477                  */
1478                 if (retval && (state->path.active > 1)) {
1479                         XFS_BMAP_INIT(args->flist, args->firstblock);
1480                         error = xfs_da_join(state);
1481                         if (!error) {
1482                                 error = xfs_bmap_finish(&args->trans,
1483                                                         args->flist,
1484                                                         &committed);
1485                         }
1486                         if (error) {
1487                                 ASSERT(committed);
1488                                 args->trans = NULL;
1489                                 xfs_bmap_cancel(args->flist);
1490                                 goto out;
1491                         }
1492
1493                         /*
1494                          * bmap_finish() may have committed the last trans
1495                          * and started a new one.  We need the inode to be
1496                          * in all transactions.
1497                          */
1498                         if (committed) {
1499                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1500                                 xfs_trans_ihold(args->trans, dp);
1501                         }
1502                 }
1503
1504                 /*
1505                  * Commit and start the next trans in the chain.
1506                  */
1507                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1508                         goto out;
1509
1510         } else if (args->rmtblkno > 0) {
1511                 /*
1512                  * Added a "remote" value, just clear the incomplete flag.
1513                  */
1514                 error = xfs_attr_leaf_clearflag(args);
1515                 if (error)
1516                         goto out;
1517         }
1518         retval = error = 0;
1519
1520 out:
1521         if (state)
1522                 xfs_da_state_free(state);
1523         if (error)
1524                 return(error);
1525         return(retval);
1526 }
1527
1528 /*
1529  * Remove a name from a B-tree attribute list.
1530  *
1531  * This will involve walking down the Btree, and may involve joining
1532  * leaf nodes and even joining intermediate nodes up to and including
1533  * the root node (a special case of an intermediate node).
1534  */
1535 STATIC int
1536 xfs_attr_node_removename(xfs_da_args_t *args)
1537 {
1538         xfs_da_state_t *state;
1539         xfs_da_state_blk_t *blk;
1540         xfs_inode_t *dp;
1541         xfs_dabuf_t *bp;
1542         int retval, error, committed, forkoff;
1543
1544         /*
1545          * Tie a string around our finger to remind us where we are.
1546          */
1547         dp = args->dp;
1548         state = xfs_da_state_alloc();
1549         state->args = args;
1550         state->mp = dp->i_mount;
1551         state->blocksize = state->mp->m_sb.sb_blocksize;
1552         state->node_ents = state->mp->m_attr_node_ents;
1553
1554         /*
1555          * Search to see if name exists, and get back a pointer to it.
1556          */
1557         error = xfs_da_node_lookup_int(state, &retval);
1558         if (error || (retval != EEXIST)) {
1559                 if (error == 0)
1560                         error = retval;
1561                 goto out;
1562         }
1563
1564         /*
1565          * If there is an out-of-line value, de-allocate the blocks.
1566          * This is done before we remove the attribute so that we don't
1567          * overflow the maximum size of a transaction and/or hit a deadlock.
1568          */
1569         blk = &state->path.blk[ state->path.active-1 ];
1570         ASSERT(blk->bp != NULL);
1571         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1572         if (args->rmtblkno > 0) {
1573                 /*
1574                  * Fill in disk block numbers in the state structure
1575                  * so that we can get the buffers back after we commit
1576                  * several transactions in the following calls.
1577                  */
1578                 error = xfs_attr_fillstate(state);
1579                 if (error)
1580                         goto out;
1581
1582                 /*
1583                  * Mark the attribute as INCOMPLETE, then bunmapi() the
1584                  * remote value.
1585                  */
1586                 error = xfs_attr_leaf_setflag(args);
1587                 if (error)
1588                         goto out;
1589                 error = xfs_attr_rmtval_remove(args);
1590                 if (error)
1591                         goto out;
1592
1593                 /*
1594                  * Refill the state structure with buffers, the prior calls
1595                  * released our buffers.
1596                  */
1597                 error = xfs_attr_refillstate(state);
1598                 if (error)
1599                         goto out;
1600         }
1601
1602         /*
1603          * Remove the name and update the hashvals in the tree.
1604          */
1605         blk = &state->path.blk[ state->path.active-1 ];
1606         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1607         retval = xfs_attr_leaf_remove(blk->bp, args);
1608         xfs_da_fixhashpath(state, &state->path);
1609
1610         /*
1611          * Check to see if the tree needs to be collapsed.
1612          */
1613         if (retval && (state->path.active > 1)) {
1614                 XFS_BMAP_INIT(args->flist, args->firstblock);
1615                 error = xfs_da_join(state);
1616                 if (!error) {
1617                         error = xfs_bmap_finish(&args->trans, args->flist,
1618                                                 &committed);
1619                 }
1620                 if (error) {
1621                         ASSERT(committed);
1622                         args->trans = NULL;
1623                         xfs_bmap_cancel(args->flist);
1624                         goto out;
1625                 }
1626
1627                 /*
1628                  * bmap_finish() may have committed the last trans and started
1629                  * a new one.  We need the inode to be in all transactions.
1630                  */
1631                 if (committed) {
1632                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1633                         xfs_trans_ihold(args->trans, dp);
1634                 }
1635
1636                 /*
1637                  * Commit the Btree join operation and start a new trans.
1638                  */
1639                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1640                         goto out;
1641         }
1642
1643         /*
1644          * If the result is small enough, push it all into the inode.
1645          */
1646         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1647                 /*
1648                  * Have to get rid of the copy of this dabuf in the state.
1649                  */
1650                 ASSERT(state->path.active == 1);
1651                 ASSERT(state->path.blk[0].bp);
1652                 xfs_da_buf_done(state->path.blk[0].bp);
1653                 state->path.blk[0].bp = NULL;
1654
1655                 error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
1656                                                      XFS_ATTR_FORK);
1657                 if (error)
1658                         goto out;
1659                 ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
1660                                       bp->data)->hdr.info.magic)
1661                                                        == XFS_ATTR_LEAF_MAGIC);
1662
1663                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1664                         XFS_BMAP_INIT(args->flist, args->firstblock);
1665                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1666                         /* bp is gone due to xfs_da_shrink_inode */
1667                         if (!error) {
1668                                 error = xfs_bmap_finish(&args->trans,
1669                                                         args->flist,
1670                                                         &committed);
1671                         }
1672                         if (error) {
1673                                 ASSERT(committed);
1674                                 args->trans = NULL;
1675                                 xfs_bmap_cancel(args->flist);
1676                                 goto out;
1677                         }
1678
1679                         /*
1680                          * bmap_finish() may have committed the last trans
1681                          * and started a new one.  We need the inode to be
1682                          * in all transactions.
1683                          */
1684                         if (committed) {
1685                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1686                                 xfs_trans_ihold(args->trans, dp);
1687                         }
1688                 } else
1689                         xfs_da_brelse(args->trans, bp);
1690         }
1691         error = 0;
1692
1693 out:
1694         xfs_da_state_free(state);
1695         return(error);
1696 }
1697
1698 /*
1699  * Fill in the disk block numbers in the state structure for the buffers
1700  * that are attached to the state structure.
1701  * This is done so that we can quickly reattach ourselves to those buffers
1702  * after some set of transaction commits have released these buffers.
1703  */
1704 STATIC int
1705 xfs_attr_fillstate(xfs_da_state_t *state)
1706 {
1707         xfs_da_state_path_t *path;
1708         xfs_da_state_blk_t *blk;
1709         int level;
1710
1711         /*
1712          * Roll down the "path" in the state structure, storing the on-disk
1713          * block number for those buffers in the "path".
1714          */
1715         path = &state->path;
1716         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1717         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1718                 if (blk->bp) {
1719                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1720                         xfs_da_buf_done(blk->bp);
1721                         blk->bp = NULL;
1722                 } else {
1723                         blk->disk_blkno = 0;
1724                 }
1725         }
1726
1727         /*
1728          * Roll down the "altpath" in the state structure, storing the on-disk
1729          * block number for those buffers in the "altpath".
1730          */
1731         path = &state->altpath;
1732         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1733         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1734                 if (blk->bp) {
1735                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1736                         xfs_da_buf_done(blk->bp);
1737                         blk->bp = NULL;
1738                 } else {
1739                         blk->disk_blkno = 0;
1740                 }
1741         }
1742
1743         return(0);
1744 }
1745
1746 /*
1747  * Reattach the buffers to the state structure based on the disk block
1748  * numbers stored in the state structure.
1749  * This is done after some set of transaction commits have released those
1750  * buffers from our grip.
1751  */
1752 STATIC int
1753 xfs_attr_refillstate(xfs_da_state_t *state)
1754 {
1755         xfs_da_state_path_t *path;
1756         xfs_da_state_blk_t *blk;
1757         int level, error;
1758
1759         /*
1760          * Roll down the "path" in the state structure, storing the on-disk
1761          * block number for those buffers in the "path".
1762          */
1763         path = &state->path;
1764         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1765         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1766                 if (blk->disk_blkno) {
1767                         error = xfs_da_read_buf(state->args->trans,
1768                                                 state->args->dp,
1769                                                 blk->blkno, blk->disk_blkno,
1770                                                 &blk->bp, XFS_ATTR_FORK);
1771                         if (error)
1772                                 return(error);
1773                 } else {
1774                         blk->bp = NULL;
1775                 }
1776         }
1777
1778         /*
1779          * Roll down the "altpath" in the state structure, storing the on-disk
1780          * block number for those buffers in the "altpath".
1781          */
1782         path = &state->altpath;
1783         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1784         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1785                 if (blk->disk_blkno) {
1786                         error = xfs_da_read_buf(state->args->trans,
1787                                                 state->args->dp,
1788                                                 blk->blkno, blk->disk_blkno,
1789                                                 &blk->bp, XFS_ATTR_FORK);
1790                         if (error)
1791                                 return(error);
1792                 } else {
1793                         blk->bp = NULL;
1794                 }
1795         }
1796
1797         return(0);
1798 }
1799
1800 /*
1801  * Look up a filename in a node attribute list.
1802  *
1803  * This routine gets called for any attribute fork that has more than one
1804  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1805  * "remote" values taking up more blocks.
1806  */
1807 STATIC int
1808 xfs_attr_node_get(xfs_da_args_t *args)
1809 {
1810         xfs_da_state_t *state;
1811         xfs_da_state_blk_t *blk;
1812         int error, retval;
1813         int i;
1814
1815         state = xfs_da_state_alloc();
1816         state->args = args;
1817         state->mp = args->dp->i_mount;
1818         state->blocksize = state->mp->m_sb.sb_blocksize;
1819         state->node_ents = state->mp->m_attr_node_ents;
1820
1821         /*
1822          * Search to see if name exists, and get back a pointer to it.
1823          */
1824         error = xfs_da_node_lookup_int(state, &retval);
1825         if (error) {
1826                 retval = error;
1827         } else if (retval == EEXIST) {
1828                 blk = &state->path.blk[ state->path.active-1 ];
1829                 ASSERT(blk->bp != NULL);
1830                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1831
1832                 /*
1833                  * Get the value, local or "remote"
1834                  */
1835                 retval = xfs_attr_leaf_getvalue(blk->bp, args);
1836                 if (!retval && (args->rmtblkno > 0)
1837                     && !(args->flags & ATTR_KERNOVAL)) {
1838                         retval = xfs_attr_rmtval_get(args);
1839                 }
1840         }
1841
1842         /*
1843          * If not in a transaction, we have to release all the buffers.
1844          */
1845         for (i = 0; i < state->path.active; i++) {
1846                 xfs_da_brelse(args->trans, state->path.blk[i].bp);
1847                 state->path.blk[i].bp = NULL;
1848         }
1849
1850         xfs_da_state_free(state);
1851         return(retval);
1852 }
1853
1854 STATIC int                                                      /* error */
1855 xfs_attr_node_list(xfs_attr_list_context_t *context)
1856 {
1857         attrlist_cursor_kern_t *cursor;
1858         xfs_attr_leafblock_t *leaf;
1859         xfs_da_intnode_t *node;
1860         xfs_da_node_entry_t *btree;
1861         int error, i;
1862         xfs_dabuf_t *bp;
1863
1864         cursor = context->cursor;
1865         cursor->initted = 1;
1866
1867         /*
1868          * Do all sorts of validation on the passed-in cursor structure.
1869          * If anything is amiss, ignore the cursor and look up the hashval
1870          * starting from the btree root.
1871          */
1872         bp = NULL;
1873         if (cursor->blkno > 0) {
1874                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1875                                               &bp, XFS_ATTR_FORK);
1876                 if ((error != 0) && (error != EFSCORRUPTED))
1877                         return(error);
1878                 if (bp) {
1879                         node = bp->data;
1880                         switch (be16_to_cpu(node->hdr.info.magic)) {
1881                         case XFS_DA_NODE_MAGIC:
1882                                 xfs_attr_trace_l_cn("wrong blk", context, node);
1883                                 xfs_da_brelse(NULL, bp);
1884                                 bp = NULL;
1885                                 break;
1886                         case XFS_ATTR_LEAF_MAGIC:
1887                                 leaf = bp->data;
1888                                 if (cursor->hashval > be32_to_cpu(leaf->entries[
1889                                     be16_to_cpu(leaf->hdr.count)-1].hashval)) {
1890                                         xfs_attr_trace_l_cl("wrong blk",
1891                                                            context, leaf);
1892                                         xfs_da_brelse(NULL, bp);
1893                                         bp = NULL;
1894                                 } else if (cursor->hashval <=
1895                                              be32_to_cpu(leaf->entries[0].hashval)) {
1896                                         xfs_attr_trace_l_cl("maybe wrong blk",
1897                                                            context, leaf);
1898                                         xfs_da_brelse(NULL, bp);
1899                                         bp = NULL;
1900                                 }
1901                                 break;
1902                         default:
1903                                 xfs_attr_trace_l_c("wrong blk - ??", context);
1904                                 xfs_da_brelse(NULL, bp);
1905                                 bp = NULL;
1906                         }
1907                 }
1908         }
1909
1910         /*
1911          * We did not find what we expected given the cursor's contents,
1912          * so we start from the top and work down based on the hash value.
1913          * Note that start of node block is same as start of leaf block.
1914          */
1915         if (bp == NULL) {
1916                 cursor->blkno = 0;
1917                 for (;;) {
1918                         error = xfs_da_read_buf(NULL, context->dp,
1919                                                       cursor->blkno, -1, &bp,
1920                                                       XFS_ATTR_FORK);
1921                         if (error)
1922                                 return(error);
1923                         if (unlikely(bp == NULL)) {
1924                                 XFS_ERROR_REPORT("xfs_attr_node_list(2)",
1925                                                  XFS_ERRLEVEL_LOW,
1926                                                  context->dp->i_mount);
1927                                 return(XFS_ERROR(EFSCORRUPTED));
1928                         }
1929                         node = bp->data;
1930                         if (be16_to_cpu(node->hdr.info.magic)
1931                                                         == XFS_ATTR_LEAF_MAGIC)
1932                                 break;
1933                         if (unlikely(be16_to_cpu(node->hdr.info.magic)
1934                                                         != XFS_DA_NODE_MAGIC)) {
1935                                 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
1936                                                      XFS_ERRLEVEL_LOW,
1937                                                      context->dp->i_mount,
1938                                                      node);
1939                                 xfs_da_brelse(NULL, bp);
1940                                 return(XFS_ERROR(EFSCORRUPTED));
1941                         }
1942                         btree = node->btree;
1943                         for (i = 0; i < be16_to_cpu(node->hdr.count);
1944                                                                 btree++, i++) {
1945                                 if (cursor->hashval
1946                                                 <= be32_to_cpu(btree->hashval)) {
1947                                         cursor->blkno = be32_to_cpu(btree->before);
1948                                         xfs_attr_trace_l_cb("descending",
1949                                                             context, btree);
1950                                         break;
1951                                 }
1952                         }
1953                         if (i == be16_to_cpu(node->hdr.count)) {
1954                                 xfs_da_brelse(NULL, bp);
1955                                 return(0);
1956                         }
1957                         xfs_da_brelse(NULL, bp);
1958                 }
1959         }
1960         ASSERT(bp != NULL);
1961
1962         /*
1963          * Roll upward through the blocks, processing each leaf block in
1964          * order.  As long as there is space in the result buffer, keep
1965          * adding the information.
1966          */
1967         for (;;) {
1968                 leaf = bp->data;
1969                 if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
1970                                                 != XFS_ATTR_LEAF_MAGIC)) {
1971                         XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
1972                                              XFS_ERRLEVEL_LOW,
1973                                              context->dp->i_mount, leaf);
1974                         xfs_da_brelse(NULL, bp);
1975                         return(XFS_ERROR(EFSCORRUPTED));
1976                 }
1977                 error = xfs_attr_leaf_list_int(bp, context);
1978                 if (error) {
1979                         xfs_da_brelse(NULL, bp);
1980                         return error;
1981                 }
1982                 if (context->seen_enough || leaf->hdr.info.forw == 0)
1983                         break;
1984                 cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
1985                 xfs_da_brelse(NULL, bp);
1986                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1987                                               &bp, XFS_ATTR_FORK);
1988                 if (error)
1989                         return(error);
1990                 if (unlikely((bp == NULL))) {
1991                         XFS_ERROR_REPORT("xfs_attr_node_list(5)",
1992                                          XFS_ERRLEVEL_LOW,
1993                                          context->dp->i_mount);
1994                         return(XFS_ERROR(EFSCORRUPTED));
1995                 }
1996         }
1997         xfs_da_brelse(NULL, bp);
1998         return(0);
1999 }
2000
2001
2002 /*========================================================================
2003  * External routines for manipulating out-of-line attribute values.
2004  *========================================================================*/
2005
2006 /*
2007  * Read the value associated with an attribute from the out-of-line buffer
2008  * that we stored it in.
2009  */
2010 int
2011 xfs_attr_rmtval_get(xfs_da_args_t *args)
2012 {
2013         xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
2014         xfs_mount_t *mp;
2015         xfs_daddr_t dblkno;
2016         xfs_caddr_t dst;
2017         xfs_buf_t *bp;
2018         int nmap, error, tmp, valuelen, blkcnt, i;
2019         xfs_dablk_t lblkno;
2020
2021         ASSERT(!(args->flags & ATTR_KERNOVAL));
2022
2023         mp = args->dp->i_mount;
2024         dst = args->value;
2025         valuelen = args->valuelen;
2026         lblkno = args->rmtblkno;
2027         while (valuelen > 0) {
2028                 nmap = ATTR_RMTVALUE_MAPSIZE;
2029                 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
2030                                   args->rmtblkcnt,
2031                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2032                                   NULL, 0, map, &nmap, NULL, NULL);
2033                 if (error)
2034                         return(error);
2035                 ASSERT(nmap >= 1);
2036
2037                 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
2038                         ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
2039                                (map[i].br_startblock != HOLESTARTBLOCK));
2040                         dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2041                         blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2042                         error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2043                                              blkcnt, XFS_BUF_LOCK, &bp);
2044                         if (error)
2045                                 return(error);
2046
2047                         tmp = (valuelen < XFS_BUF_SIZE(bp))
2048                                 ? valuelen : XFS_BUF_SIZE(bp);
2049                         xfs_biomove(bp, 0, tmp, dst, XFS_B_READ);
2050                         xfs_buf_relse(bp);
2051                         dst += tmp;
2052                         valuelen -= tmp;
2053
2054                         lblkno += map[i].br_blockcount;
2055                 }
2056         }
2057         ASSERT(valuelen == 0);
2058         return(0);
2059 }
2060
2061 /*
2062  * Write the value associated with an attribute into the out-of-line buffer
2063  * that we have defined for it.
2064  */
2065 STATIC int
2066 xfs_attr_rmtval_set(xfs_da_args_t *args)
2067 {
2068         xfs_mount_t *mp;
2069         xfs_fileoff_t lfileoff;
2070         xfs_inode_t *dp;
2071         xfs_bmbt_irec_t map;
2072         xfs_daddr_t dblkno;
2073         xfs_caddr_t src;
2074         xfs_buf_t *bp;
2075         xfs_dablk_t lblkno;
2076         int blkcnt, valuelen, nmap, error, tmp, committed;
2077
2078         dp = args->dp;
2079         mp = dp->i_mount;
2080         src = args->value;
2081
2082         /*
2083          * Find a "hole" in the attribute address space large enough for
2084          * us to drop the new attribute's value into.
2085          */
2086         blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
2087         lfileoff = 0;
2088         error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
2089                                                    XFS_ATTR_FORK);
2090         if (error) {
2091                 return(error);
2092         }
2093         args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
2094         args->rmtblkcnt = blkcnt;
2095
2096         /*
2097          * Roll through the "value", allocating blocks on disk as required.
2098          */
2099         while (blkcnt > 0) {
2100                 /*
2101                  * Allocate a single extent, up to the size of the value.
2102                  */
2103                 XFS_BMAP_INIT(args->flist, args->firstblock);
2104                 nmap = 1;
2105                 error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno,
2106                                   blkcnt,
2107                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
2108                                                         XFS_BMAPI_WRITE,
2109                                   args->firstblock, args->total, &map, &nmap,
2110                                   args->flist, NULL);
2111                 if (!error) {
2112                         error = xfs_bmap_finish(&args->trans, args->flist,
2113                                                 &committed);
2114                 }
2115                 if (error) {
2116                         ASSERT(committed);
2117                         args->trans = NULL;
2118                         xfs_bmap_cancel(args->flist);
2119                         return(error);
2120                 }
2121
2122                 /*
2123                  * bmap_finish() may have committed the last trans and started
2124                  * a new one.  We need the inode to be in all transactions.
2125                  */
2126                 if (committed) {
2127                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
2128                         xfs_trans_ihold(args->trans, dp);
2129                 }
2130
2131                 ASSERT(nmap == 1);
2132                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2133                        (map.br_startblock != HOLESTARTBLOCK));
2134                 lblkno += map.br_blockcount;
2135                 blkcnt -= map.br_blockcount;
2136
2137                 /*
2138                  * Start the next trans in the chain.
2139                  */
2140                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
2141                         return (error);
2142         }
2143
2144         /*
2145          * Roll through the "value", copying the attribute value to the
2146          * already-allocated blocks.  Blocks are written synchronously
2147          * so that we can know they are all on disk before we turn off
2148          * the INCOMPLETE flag.
2149          */
2150         lblkno = args->rmtblkno;
2151         valuelen = args->valuelen;
2152         while (valuelen > 0) {
2153                 /*
2154                  * Try to remember where we decided to put the value.
2155                  */
2156                 XFS_BMAP_INIT(args->flist, args->firstblock);
2157                 nmap = 1;
2158                 error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
2159                                   args->rmtblkcnt,
2160                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2161                                   args->firstblock, 0, &map, &nmap,
2162                                   NULL, NULL);
2163                 if (error) {
2164                         return(error);
2165                 }
2166                 ASSERT(nmap == 1);
2167                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2168                        (map.br_startblock != HOLESTARTBLOCK));
2169
2170                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2171                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2172
2173                 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno,
2174                                                         blkcnt, XFS_BUF_LOCK);
2175                 ASSERT(bp);
2176                 ASSERT(!XFS_BUF_GETERROR(bp));
2177
2178                 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
2179                                                         XFS_BUF_SIZE(bp);
2180                 xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE);
2181                 if (tmp < XFS_BUF_SIZE(bp))
2182                         xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
2183                 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
2184                         return (error);
2185                 }
2186                 src += tmp;
2187                 valuelen -= tmp;
2188
2189                 lblkno += map.br_blockcount;
2190         }
2191         ASSERT(valuelen == 0);
2192         return(0);
2193 }
2194
2195 /*
2196  * Remove the value associated with an attribute by deleting the
2197  * out-of-line buffer that it is stored on.
2198  */
2199 STATIC int
2200 xfs_attr_rmtval_remove(xfs_da_args_t *args)
2201 {
2202         xfs_mount_t *mp;
2203         xfs_bmbt_irec_t map;
2204         xfs_buf_t *bp;
2205         xfs_daddr_t dblkno;
2206         xfs_dablk_t lblkno;
2207         int valuelen, blkcnt, nmap, error, done, committed;
2208
2209         mp = args->dp->i_mount;
2210
2211         /*
2212          * Roll through the "value", invalidating the attribute value's
2213          * blocks.
2214          */
2215         lblkno = args->rmtblkno;
2216         valuelen = args->rmtblkcnt;
2217         while (valuelen > 0) {
2218                 /*
2219                  * Try to remember where we decided to put the value.
2220                  */
2221                 XFS_BMAP_INIT(args->flist, args->firstblock);
2222                 nmap = 1;
2223                 error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno,
2224                                         args->rmtblkcnt,
2225                                         XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2226                                         args->firstblock, 0, &map, &nmap,
2227                                         args->flist, NULL);
2228                 if (error) {
2229                         return(error);
2230                 }
2231                 ASSERT(nmap == 1);
2232                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2233                        (map.br_startblock != HOLESTARTBLOCK));
2234
2235                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2236                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2237
2238                 /*
2239                  * If the "remote" value is in the cache, remove it.
2240                  */
2241                 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt,
2242                                 XFS_INCORE_TRYLOCK);
2243                 if (bp) {
2244                         XFS_BUF_STALE(bp);
2245                         XFS_BUF_UNDELAYWRITE(bp);
2246                         xfs_buf_relse(bp);
2247                         bp = NULL;
2248                 }
2249
2250                 valuelen -= map.br_blockcount;
2251
2252                 lblkno += map.br_blockcount;
2253         }
2254
2255         /*
2256          * Keep de-allocating extents until the remote-value region is gone.
2257          */
2258         lblkno = args->rmtblkno;
2259         blkcnt = args->rmtblkcnt;
2260         done = 0;
2261         while (!done) {
2262                 XFS_BMAP_INIT(args->flist, args->firstblock);
2263                 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2264                                     XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2265                                     1, args->firstblock, args->flist,
2266                                     NULL, &done);
2267                 if (!error) {
2268                         error = xfs_bmap_finish(&args->trans, args->flist,
2269                                                 &committed);
2270                 }
2271                 if (error) {
2272                         ASSERT(committed);
2273                         args->trans = NULL;
2274                         xfs_bmap_cancel(args->flist);
2275                         return(error);
2276                 }
2277
2278                 /*
2279                  * bmap_finish() may have committed the last trans and started
2280                  * a new one.  We need the inode to be in all transactions.
2281                  */
2282                 if (committed) {
2283                         xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL);
2284                         xfs_trans_ihold(args->trans, args->dp);
2285                 }
2286
2287                 /*
2288                  * Close out trans and start the next one in the chain.
2289                  */
2290                 if ((error = xfs_attr_rolltrans(&args->trans, args->dp)))
2291                         return (error);
2292         }
2293         return(0);
2294 }
2295
2296 #if defined(XFS_ATTR_TRACE)
2297 /*
2298  * Add a trace buffer entry for an attr_list context structure.
2299  */
2300 void
2301 xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2302 {
2303         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where,
2304                 (__psunsigned_t)context->dp,
2305                 (__psunsigned_t)context->cursor->hashval,
2306                 (__psunsigned_t)context->cursor->blkno,
2307                 (__psunsigned_t)context->cursor->offset,
2308                 (__psunsigned_t)context->alist,
2309                 (__psunsigned_t)context->bufsize,
2310                 (__psunsigned_t)context->count,
2311                 (__psunsigned_t)context->firstu,
2312                 (__psunsigned_t)
2313                         ((context->count > 0) &&
2314                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2315                                 ? (ATTR_ENTRY(context->alist,
2316                                               context->count-1)->a_valuelen)
2317                                 : 0,
2318                 (__psunsigned_t)context->dupcnt,
2319                 (__psunsigned_t)context->flags,
2320                 (__psunsigned_t)NULL,
2321                 (__psunsigned_t)NULL,
2322                 (__psunsigned_t)NULL);
2323 }
2324
2325 /*
2326  * Add a trace buffer entry for a context structure and a Btree node.
2327  */
2328 void
2329 xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2330                          struct xfs_da_intnode *node)
2331 {
2332         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where,
2333                 (__psunsigned_t)context->dp,
2334                 (__psunsigned_t)context->cursor->hashval,
2335                 (__psunsigned_t)context->cursor->blkno,
2336                 (__psunsigned_t)context->cursor->offset,
2337                 (__psunsigned_t)context->alist,
2338                 (__psunsigned_t)context->bufsize,
2339                 (__psunsigned_t)context->count,
2340                 (__psunsigned_t)context->firstu,
2341                 (__psunsigned_t)
2342                         ((context->count > 0) &&
2343                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2344                                 ? (ATTR_ENTRY(context->alist,
2345                                               context->count-1)->a_valuelen)
2346                                 : 0,
2347                 (__psunsigned_t)context->dupcnt,
2348                 (__psunsigned_t)context->flags,
2349                 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2350                 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2351                 (__psunsigned_t)be32_to_cpu(node->btree[
2352                                     be16_to_cpu(node->hdr.count)-1].hashval));
2353 }
2354
2355 /*
2356  * Add a trace buffer entry for a context structure and a Btree element.
2357  */
2358 void
2359 xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2360                           struct xfs_da_node_entry *btree)
2361 {
2362         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where,
2363                 (__psunsigned_t)context->dp,
2364                 (__psunsigned_t)context->cursor->hashval,
2365                 (__psunsigned_t)context->cursor->blkno,
2366                 (__psunsigned_t)context->cursor->offset,
2367                 (__psunsigned_t)context->alist,
2368                 (__psunsigned_t)context->bufsize,
2369                 (__psunsigned_t)context->count,
2370                 (__psunsigned_t)context->firstu,
2371                 (__psunsigned_t)
2372                         ((context->count > 0) &&
2373                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2374                                 ? (ATTR_ENTRY(context->alist,
2375                                               context->count-1)->a_valuelen)
2376                                 : 0,
2377                 (__psunsigned_t)context->dupcnt,
2378                 (__psunsigned_t)context->flags,
2379                 (__psunsigned_t)be32_to_cpu(btree->hashval),
2380                 (__psunsigned_t)be32_to_cpu(btree->before),
2381                 (__psunsigned_t)NULL);
2382 }
2383
2384 /*
2385  * Add a trace buffer entry for a context structure and a leaf block.
2386  */
2387 void
2388 xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2389                               struct xfs_attr_leafblock *leaf)
2390 {
2391         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where,
2392                 (__psunsigned_t)context->dp,
2393                 (__psunsigned_t)context->cursor->hashval,
2394                 (__psunsigned_t)context->cursor->blkno,
2395                 (__psunsigned_t)context->cursor->offset,
2396                 (__psunsigned_t)context->alist,
2397                 (__psunsigned_t)context->bufsize,
2398                 (__psunsigned_t)context->count,
2399                 (__psunsigned_t)context->firstu,
2400                 (__psunsigned_t)
2401                         ((context->count > 0) &&
2402                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2403                                 ? (ATTR_ENTRY(context->alist,
2404                                               context->count-1)->a_valuelen)
2405                                 : 0,
2406                 (__psunsigned_t)context->dupcnt,
2407                 (__psunsigned_t)context->flags,
2408                 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2409                 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2410                 (__psunsigned_t)be32_to_cpu(leaf->entries[
2411                                 be16_to_cpu(leaf->hdr.count)-1].hashval));
2412 }
2413
2414 /*
2415  * Add a trace buffer entry for the arguments given to the routine,
2416  * generic form.
2417  */
2418 void
2419 xfs_attr_trace_enter(int type, char *where,
2420                          __psunsigned_t a2, __psunsigned_t a3,
2421                          __psunsigned_t a4, __psunsigned_t a5,
2422                          __psunsigned_t a6, __psunsigned_t a7,
2423                          __psunsigned_t a8, __psunsigned_t a9,
2424                          __psunsigned_t a10, __psunsigned_t a11,
2425                          __psunsigned_t a12, __psunsigned_t a13,
2426                          __psunsigned_t a14, __psunsigned_t a15)
2427 {
2428         ASSERT(xfs_attr_trace_buf);
2429         ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2430                                          (void *)where,
2431                                          (void *)a2,  (void *)a3,  (void *)a4,
2432                                          (void *)a5,  (void *)a6,  (void *)a7,
2433                                          (void *)a8,  (void *)a9,  (void *)a10,
2434                                          (void *)a11, (void *)a12, (void *)a13,
2435                                          (void *)a14, (void *)a15);
2436 }
2437 #endif  /* XFS_ATTR_TRACE */
2438
2439
2440 /*========================================================================
2441  * System (pseudo) namespace attribute interface routines.
2442  *========================================================================*/
2443
2444 STATIC int
2445 posix_acl_access_set(
2446         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2447 {
2448         return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
2449 }
2450
2451 STATIC int
2452 posix_acl_access_remove(
2453         bhv_vnode_t *vp, char *name, int xflags)
2454 {
2455         return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
2456 }
2457
2458 STATIC int
2459 posix_acl_access_get(
2460         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2461 {
2462         return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
2463 }
2464
2465 STATIC int
2466 posix_acl_access_exists(
2467         bhv_vnode_t *vp)
2468 {
2469         return xfs_acl_vhasacl_access(vp);
2470 }
2471
2472 STATIC int
2473 posix_acl_default_set(
2474         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2475 {
2476         return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
2477 }
2478
2479 STATIC int
2480 posix_acl_default_get(
2481         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2482 {
2483         return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
2484 }
2485
2486 STATIC int
2487 posix_acl_default_remove(
2488         bhv_vnode_t *vp, char *name, int xflags)
2489 {
2490         return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
2491 }
2492
2493 STATIC int
2494 posix_acl_default_exists(
2495         bhv_vnode_t *vp)
2496 {
2497         return xfs_acl_vhasacl_default(vp);
2498 }
2499
2500 static struct attrnames posix_acl_access = {
2501         .attr_name      = "posix_acl_access",
2502         .attr_namelen   = sizeof("posix_acl_access") - 1,
2503         .attr_get       = posix_acl_access_get,
2504         .attr_set       = posix_acl_access_set,
2505         .attr_remove    = posix_acl_access_remove,
2506         .attr_exists    = posix_acl_access_exists,
2507 };
2508
2509 static struct attrnames posix_acl_default = {
2510         .attr_name      = "posix_acl_default",
2511         .attr_namelen   = sizeof("posix_acl_default") - 1,
2512         .attr_get       = posix_acl_default_get,
2513         .attr_set       = posix_acl_default_set,
2514         .attr_remove    = posix_acl_default_remove,
2515         .attr_exists    = posix_acl_default_exists,
2516 };
2517
2518 static struct attrnames *attr_system_names[] =
2519         { &posix_acl_access, &posix_acl_default };
2520
2521
2522 /*========================================================================
2523  * Namespace-prefix-style attribute name interface routines.
2524  *========================================================================*/
2525
2526 STATIC int
2527 attr_generic_set(
2528         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2529 {
2530         return -xfs_attr_set(xfs_vtoi(vp), name, data, size, xflags);
2531 }
2532
2533 STATIC int
2534 attr_generic_get(
2535         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2536 {
2537         int     error, asize = size;
2538
2539         error = xfs_attr_get(xfs_vtoi(vp), name, data, &asize, xflags);
2540         if (!error)
2541                 return asize;
2542         return -error;
2543 }
2544
2545 STATIC int
2546 attr_generic_remove(
2547         bhv_vnode_t *vp, char *name, int xflags)
2548 {
2549         return -xfs_attr_remove(xfs_vtoi(vp), name, xflags);
2550 }
2551
2552 STATIC int
2553 attr_generic_listadd(
2554         attrnames_t             *prefix,
2555         attrnames_t             *namesp,
2556         void                    *data,
2557         size_t                  size,
2558         ssize_t                 *result)
2559 {
2560         char                    *p = data + *result;
2561
2562         *result += prefix->attr_namelen;
2563         *result += namesp->attr_namelen + 1;
2564         if (!size)
2565                 return 0;
2566         if (*result > size)
2567                 return -ERANGE;
2568         strcpy(p, prefix->attr_name);
2569         p += prefix->attr_namelen;
2570         strcpy(p, namesp->attr_name);
2571         p += namesp->attr_namelen + 1;
2572         return 0;
2573 }
2574
2575 STATIC int
2576 attr_system_list(
2577         bhv_vnode_t             *vp,
2578         void                    *data,
2579         size_t                  size,
2580         ssize_t                 *result)
2581 {
2582         attrnames_t             *namesp;
2583         int                     i, error = 0;
2584
2585         for (i = 0; i < ATTR_SYSCOUNT; i++) {
2586                 namesp = attr_system_names[i];
2587                 if (!namesp->attr_exists || !namesp->attr_exists(vp))
2588                         continue;
2589                 error = attr_generic_listadd(&attr_system, namesp,
2590                                                 data, size, result);
2591                 if (error)
2592                         break;
2593         }
2594         return error;
2595 }
2596
2597 int
2598 attr_generic_list(
2599         bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
2600 {
2601         attrlist_cursor_kern_t  cursor = { 0 };
2602         int                     error;
2603
2604         error = xfs_attr_list(xfs_vtoi(vp), data, size, xflags, &cursor);
2605         if (error > 0)
2606                 return -error;
2607         *result = -error;
2608         return attr_system_list(vp, data, size, result);
2609 }
2610
2611 attrnames_t *
2612 attr_lookup_namespace(
2613         char                    *name,
2614         struct attrnames        **names,
2615         int                     nnames)
2616 {
2617         int                     i;
2618
2619         for (i = 0; i < nnames; i++)
2620                 if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
2621                         return names[i];
2622         return NULL;
2623 }
2624
2625 /*
2626  * Some checks to prevent people abusing EAs to get over quota:
2627  * - Don't allow modifying user EAs on devices/symlinks;
2628  * - Don't allow modifying user EAs if sticky bit set;
2629  */
2630 STATIC int
2631 attr_user_capable(
2632         bhv_vnode_t     *vp,
2633         cred_t          *cred)
2634 {
2635         struct inode    *inode = vn_to_inode(vp);
2636
2637         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2638                 return -EPERM;
2639         if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
2640             !capable(CAP_SYS_ADMIN))
2641                 return -EPERM;
2642         if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
2643             (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
2644                 return -EPERM;
2645         return 0;
2646 }
2647
2648 STATIC int
2649 attr_trusted_capable(
2650         bhv_vnode_t     *vp,
2651         cred_t          *cred)
2652 {
2653         struct inode    *inode = vn_to_inode(vp);
2654
2655         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2656                 return -EPERM;
2657         if (!capable(CAP_SYS_ADMIN))
2658                 return -EPERM;
2659         return 0;
2660 }
2661
2662 STATIC int
2663 attr_system_set(
2664         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2665 {
2666         attrnames_t     *namesp;
2667         int             error;
2668
2669         if (xflags & ATTR_CREATE)
2670                 return -EINVAL;
2671
2672         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2673         if (!namesp)
2674                 return -EOPNOTSUPP;
2675         error = namesp->attr_set(vp, name, data, size, xflags);
2676         if (!error)
2677                 error = vn_revalidate(vp);
2678         return error;
2679 }
2680
2681 STATIC int
2682 attr_system_get(
2683         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2684 {
2685         attrnames_t     *namesp;
2686
2687         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2688         if (!namesp)
2689                 return -EOPNOTSUPP;
2690         return namesp->attr_get(vp, name, data, size, xflags);
2691 }
2692
2693 STATIC int
2694 attr_system_remove(
2695         bhv_vnode_t *vp, char *name, int xflags)
2696 {
2697         attrnames_t     *namesp;
2698
2699         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2700         if (!namesp)
2701                 return -EOPNOTSUPP;
2702         return namesp->attr_remove(vp, name, xflags);
2703 }
2704
2705 struct attrnames attr_system = {
2706         .attr_name      = "system.",
2707         .attr_namelen   = sizeof("system.") - 1,
2708         .attr_flag      = ATTR_SYSTEM,
2709         .attr_get       = attr_system_get,
2710         .attr_set       = attr_system_set,
2711         .attr_remove    = attr_system_remove,
2712         .attr_capable   = (attrcapable_t)fs_noerr,
2713 };
2714
2715 struct attrnames attr_trusted = {
2716         .attr_name      = "trusted.",
2717         .attr_namelen   = sizeof("trusted.") - 1,
2718         .attr_flag      = ATTR_ROOT,
2719         .attr_get       = attr_generic_get,
2720         .attr_set       = attr_generic_set,
2721         .attr_remove    = attr_generic_remove,
2722         .attr_capable   = attr_trusted_capable,
2723 };
2724
2725 struct attrnames attr_secure = {
2726         .attr_name      = "security.",
2727         .attr_namelen   = sizeof("security.") - 1,
2728         .attr_flag      = ATTR_SECURE,
2729         .attr_get       = attr_generic_get,
2730         .attr_set       = attr_generic_set,
2731         .attr_remove    = attr_generic_remove,
2732         .attr_capable   = (attrcapable_t)fs_noerr,
2733 };
2734
2735 struct attrnames attr_user = {
2736         .attr_name      = "user.",
2737         .attr_namelen   = sizeof("user.") - 1,
2738         .attr_get       = attr_generic_get,
2739         .attr_set       = attr_generic_set,
2740         .attr_remove    = attr_generic_remove,
2741         .attr_capable   = attr_user_capable,
2742 };
2743
2744 struct attrnames *attr_namespaces[] =
2745         { &attr_system, &attr_trusted, &attr_secure, &attr_user };