stop_machine() now uses hard_irq_disable
[linux-2.6] / fs / xfs / xfs_attr.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 #include <linux/capability.h>
20
21 #include "xfs.h"
22 #include "xfs_fs.h"
23 #include "xfs_types.h"
24 #include "xfs_bit.h"
25 #include "xfs_log.h"
26 #include "xfs_inum.h"
27 #include "xfs_trans.h"
28 #include "xfs_sb.h"
29 #include "xfs_ag.h"
30 #include "xfs_dir2.h"
31 #include "xfs_dmapi.h"
32 #include "xfs_mount.h"
33 #include "xfs_da_btree.h"
34 #include "xfs_bmap_btree.h"
35 #include "xfs_alloc_btree.h"
36 #include "xfs_ialloc_btree.h"
37 #include "xfs_dir2_sf.h"
38 #include "xfs_attr_sf.h"
39 #include "xfs_dinode.h"
40 #include "xfs_inode.h"
41 #include "xfs_alloc.h"
42 #include "xfs_btree.h"
43 #include "xfs_inode_item.h"
44 #include "xfs_bmap.h"
45 #include "xfs_attr.h"
46 #include "xfs_attr_leaf.h"
47 #include "xfs_error.h"
48 #include "xfs_quota.h"
49 #include "xfs_trans_space.h"
50 #include "xfs_acl.h"
51 #include "xfs_rw.h"
52
53 /*
54  * xfs_attr.c
55  *
56  * Provide the external interfaces to manage attribute lists.
57  */
58
59 #define ATTR_SYSCOUNT   2
60 static struct attrnames posix_acl_access;
61 static struct attrnames posix_acl_default;
62 static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
63
64 /*========================================================================
65  * Function prototypes for the kernel.
66  *========================================================================*/
67
68 /*
69  * Internal routines when attribute list fits inside the inode.
70  */
71 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
72
73 /*
74  * Internal routines when attribute list is one block.
75  */
76 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
77 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
78 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
79 STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
80
81 /*
82  * Internal routines when attribute list is more than one block.
83  */
84 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
85 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
86 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
87 STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
88 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
89 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
90
91 /*
92  * Routines to manipulate out-of-line attribute values.
93  */
94 STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
95 STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
96
97 #define ATTR_RMTVALUE_MAPSIZE   1       /* # of map entries at once */
98
99 #if defined(XFS_ATTR_TRACE)
100 ktrace_t *xfs_attr_trace_buf;
101 #endif
102
103
104 /*========================================================================
105  * Overall external interface routines.
106  *========================================================================*/
107
108 int
109 xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
110                char *value, int *valuelenp, int flags, struct cred *cred)
111 {
112         xfs_da_args_t   args;
113         int             error;
114
115         if ((XFS_IFORK_Q(ip) == 0) ||
116             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
117              ip->i_d.di_anextents == 0))
118                 return(ENOATTR);
119
120         /*
121          * Fill in the arg structure for this request.
122          */
123         memset((char *)&args, 0, sizeof(args));
124         args.name = name;
125         args.namelen = namelen;
126         args.value = value;
127         args.valuelen = *valuelenp;
128         args.flags = flags;
129         args.hashval = xfs_da_hashname(args.name, args.namelen);
130         args.dp = ip;
131         args.whichfork = XFS_ATTR_FORK;
132
133         /*
134          * Decide on what work routines to call based on the inode size.
135          */
136         if (XFS_IFORK_Q(ip) == 0 ||
137             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
138              ip->i_d.di_anextents == 0)) {
139                 error = XFS_ERROR(ENOATTR);
140         } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
141                 error = xfs_attr_shortform_getvalue(&args);
142         } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
143                 error = xfs_attr_leaf_get(&args);
144         } else {
145                 error = xfs_attr_node_get(&args);
146         }
147
148         /*
149          * Return the number of bytes in the value to the caller.
150          */
151         *valuelenp = args.valuelen;
152
153         if (error == EEXIST)
154                 error = 0;
155         return(error);
156 }
157
158 int
159 xfs_attr_get(bhv_desc_t *bdp, const char *name, char *value, int *valuelenp,
160              int flags, struct cred *cred)
161 {
162         xfs_inode_t     *ip = XFS_BHVTOI(bdp);
163         int             error, namelen;
164
165         XFS_STATS_INC(xs_attr_get);
166
167         if (!name)
168                 return(EINVAL);
169         namelen = strlen(name);
170         if (namelen >= MAXNAMELEN)
171                 return(EFAULT);         /* match IRIX behaviour */
172
173         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
174                 return(EIO);
175
176         xfs_ilock(ip, XFS_ILOCK_SHARED);
177         error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred);
178         xfs_iunlock(ip, XFS_ILOCK_SHARED);
179         return(error);
180 }
181
182 int
183 xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
184                  char *value, int valuelen, int flags)
185 {
186         xfs_da_args_t   args;
187         xfs_fsblock_t   firstblock;
188         xfs_bmap_free_t flist;
189         int             error, err2, committed;
190         int             local, size;
191         uint            nblks;
192         xfs_mount_t     *mp = dp->i_mount;
193         int             rsvd = (flags & ATTR_ROOT) != 0;
194
195         /*
196          * Attach the dquots to the inode.
197          */
198         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
199                 return (error);
200
201         /*
202          * If the inode doesn't have an attribute fork, add one.
203          * (inode must not be locked when we call this routine)
204          */
205         if (XFS_IFORK_Q(dp) == 0) {
206                 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
207                               XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen);
208
209                 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
210                         return(error);
211         }
212
213         /*
214          * Fill in the arg structure for this request.
215          */
216         memset((char *)&args, 0, sizeof(args));
217         args.name = name;
218         args.namelen = namelen;
219         args.value = value;
220         args.valuelen = valuelen;
221         args.flags = flags;
222         args.hashval = xfs_da_hashname(args.name, args.namelen);
223         args.dp = dp;
224         args.firstblock = &firstblock;
225         args.flist = &flist;
226         args.whichfork = XFS_ATTR_FORK;
227         args.addname = 1;
228         args.oknoent = 1;
229
230         /*
231          * Determine space new attribute will use, and if it would be
232          * "local" or "remote" (note: local != inline).
233          */
234         size = xfs_attr_leaf_newentsize(namelen, valuelen,
235                                         mp->m_sb.sb_blocksize, &local);
236
237         nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
238         if (local) {
239                 if (size > (mp->m_sb.sb_blocksize >> 1)) {
240                         /* Double split possible */
241                         nblks <<= 1;
242                 }
243         } else {
244                 uint    dblocks = XFS_B_TO_FSB(mp, valuelen);
245                 /* Out of line attribute, cannot double split, but make
246                  * room for the attribute value itself.
247                  */
248                 nblks += dblocks;
249                 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
250         }
251
252         /* Size is now blocks for attribute data */
253         args.total = nblks;
254
255         /*
256          * Start our first transaction of the day.
257          *
258          * All future transactions during this code must be "chained" off
259          * this one via the trans_dup() call.  All transactions will contain
260          * the inode, and the inode will always be marked with trans_ihold().
261          * Since the inode will be locked in all transactions, we must log
262          * the inode in every transaction to let it float upward through
263          * the log.
264          */
265         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
266
267         /*
268          * Root fork attributes can use reserved data blocks for this
269          * operation if necessary
270          */
271
272         if (rsvd)
273                 args.trans->t_flags |= XFS_TRANS_RESERVE;
274
275         if ((error = xfs_trans_reserve(args.trans, (uint) nblks,
276                                       XFS_ATTRSET_LOG_RES(mp, nblks),
277                                       0, XFS_TRANS_PERM_LOG_RES,
278                                       XFS_ATTRSET_LOG_COUNT))) {
279                 xfs_trans_cancel(args.trans, 0);
280                 return(error);
281         }
282         xfs_ilock(dp, XFS_ILOCK_EXCL);
283
284         error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0,
285                          rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
286                                 XFS_QMOPT_RES_REGBLKS);
287         if (error) {
288                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
289                 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
290                 return (error);
291         }
292
293         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
294         xfs_trans_ihold(args.trans, dp);
295
296         /*
297          * If the attribute list is non-existent or a shortform list,
298          * upgrade it to a single-leaf-block attribute list.
299          */
300         if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
301             ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) &&
302              (dp->i_d.di_anextents == 0))) {
303
304                 /*
305                  * Build initial attribute list (if required).
306                  */
307                 if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
308                         xfs_attr_shortform_create(&args);
309
310                 /*
311                  * Try to add the attr to the attribute list in
312                  * the inode.
313                  */
314                 error = xfs_attr_shortform_addname(&args);
315                 if (error != ENOSPC) {
316                         /*
317                          * Commit the shortform mods, and we're done.
318                          * NOTE: this is also the error path (EEXIST, etc).
319                          */
320                         ASSERT(args.trans != NULL);
321
322                         /*
323                          * If this is a synchronous mount, make sure that
324                          * the transaction goes to disk before returning
325                          * to the user.
326                          */
327                         if (mp->m_flags & XFS_MOUNT_WSYNC) {
328                                 xfs_trans_set_sync(args.trans);
329                         }
330                         err2 = xfs_trans_commit(args.trans,
331                                                  XFS_TRANS_RELEASE_LOG_RES);
332                         xfs_iunlock(dp, XFS_ILOCK_EXCL);
333
334                         /*
335                          * Hit the inode change time.
336                          */
337                         if (!error && (flags & ATTR_KERNOTIME) == 0) {
338                                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
339                         }
340                         return(error == 0 ? err2 : error);
341                 }
342
343                 /*
344                  * It won't fit in the shortform, transform to a leaf block.
345                  * GROT: another possible req'mt for a double-split btree op.
346                  */
347                 XFS_BMAP_INIT(args.flist, args.firstblock);
348                 error = xfs_attr_shortform_to_leaf(&args);
349                 if (!error) {
350                         error = xfs_bmap_finish(&args.trans, args.flist,
351                                                 &committed);
352                 }
353                 if (error) {
354                         ASSERT(committed);
355                         args.trans = NULL;
356                         xfs_bmap_cancel(&flist);
357                         goto out;
358                 }
359
360                 /*
361                  * bmap_finish() may have committed the last trans and started
362                  * a new one.  We need the inode to be in all transactions.
363                  */
364                 if (committed) {
365                         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
366                         xfs_trans_ihold(args.trans, dp);
367                 }
368
369                 /*
370                  * Commit the leaf transformation.  We'll need another (linked)
371                  * transaction to add the new attribute to the leaf.
372                  */
373                 if ((error = xfs_attr_rolltrans(&args.trans, dp)))
374                         goto out;
375
376         }
377
378         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
379                 error = xfs_attr_leaf_addname(&args);
380         } else {
381                 error = xfs_attr_node_addname(&args);
382         }
383         if (error) {
384                 goto out;
385         }
386
387         /*
388          * If this is a synchronous mount, make sure that the
389          * transaction goes to disk before returning to the user.
390          */
391         if (mp->m_flags & XFS_MOUNT_WSYNC) {
392                 xfs_trans_set_sync(args.trans);
393         }
394
395         /*
396          * Commit the last in the sequence of transactions.
397          */
398         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
399         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
400         xfs_iunlock(dp, XFS_ILOCK_EXCL);
401
402         /*
403          * Hit the inode change time.
404          */
405         if (!error && (flags & ATTR_KERNOTIME) == 0) {
406                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
407         }
408
409         return(error);
410
411 out:
412         if (args.trans)
413                 xfs_trans_cancel(args.trans,
414                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
415         xfs_iunlock(dp, XFS_ILOCK_EXCL);
416         return(error);
417 }
418
419 int
420 xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int flags,
421              struct cred *cred)
422 {
423         xfs_inode_t     *dp;
424         int             namelen;
425
426         namelen = strlen(name);
427         if (namelen >= MAXNAMELEN)
428                 return EFAULT;          /* match IRIX behaviour */
429
430         XFS_STATS_INC(xs_attr_set);
431
432         dp = XFS_BHVTOI(bdp);
433         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
434                 return (EIO);
435
436         return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
437 }
438
439 /*
440  * Generic handler routine to remove a name from an attribute list.
441  * Transitions attribute list from Btree to shortform as necessary.
442  */
443 int
444 xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
445 {
446         xfs_da_args_t   args;
447         xfs_fsblock_t   firstblock;
448         xfs_bmap_free_t flist;
449         int             error;
450         xfs_mount_t     *mp = dp->i_mount;
451
452         /*
453          * Fill in the arg structure for this request.
454          */
455         memset((char *)&args, 0, sizeof(args));
456         args.name = name;
457         args.namelen = namelen;
458         args.flags = flags;
459         args.hashval = xfs_da_hashname(args.name, args.namelen);
460         args.dp = dp;
461         args.firstblock = &firstblock;
462         args.flist = &flist;
463         args.total = 0;
464         args.whichfork = XFS_ATTR_FORK;
465
466         /*
467          * Attach the dquots to the inode.
468          */
469         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
470                 return (error);
471
472         /*
473          * Start our first transaction of the day.
474          *
475          * All future transactions during this code must be "chained" off
476          * this one via the trans_dup() call.  All transactions will contain
477          * the inode, and the inode will always be marked with trans_ihold().
478          * Since the inode will be locked in all transactions, we must log
479          * the inode in every transaction to let it float upward through
480          * the log.
481          */
482         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
483
484         /*
485          * Root fork attributes can use reserved data blocks for this
486          * operation if necessary
487          */
488
489         if (flags & ATTR_ROOT)
490                 args.trans->t_flags |= XFS_TRANS_RESERVE;
491
492         if ((error = xfs_trans_reserve(args.trans,
493                                       XFS_ATTRRM_SPACE_RES(mp),
494                                       XFS_ATTRRM_LOG_RES(mp),
495                                       0, XFS_TRANS_PERM_LOG_RES,
496                                       XFS_ATTRRM_LOG_COUNT))) {
497                 xfs_trans_cancel(args.trans, 0);
498                 return(error);
499         }
500
501         xfs_ilock(dp, XFS_ILOCK_EXCL);
502         /*
503          * No need to make quota reservations here. We expect to release some
504          * blocks not allocate in the common case.
505          */
506         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
507         xfs_trans_ihold(args.trans, dp);
508
509         /*
510          * Decide on what work routines to call based on the inode size.
511          */
512         if (XFS_IFORK_Q(dp) == 0 ||
513             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
514              dp->i_d.di_anextents == 0)) {
515                 error = XFS_ERROR(ENOATTR);
516                 goto out;
517         }
518         if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
519                 ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
520                 error = xfs_attr_shortform_remove(&args);
521                 if (error) {
522                         goto out;
523                 }
524         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
525                 error = xfs_attr_leaf_removename(&args);
526         } else {
527                 error = xfs_attr_node_removename(&args);
528         }
529         if (error) {
530                 goto out;
531         }
532
533         /*
534          * If this is a synchronous mount, make sure that the
535          * transaction goes to disk before returning to the user.
536          */
537         if (mp->m_flags & XFS_MOUNT_WSYNC) {
538                 xfs_trans_set_sync(args.trans);
539         }
540
541         /*
542          * Commit the last in the sequence of transactions.
543          */
544         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
545         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
546         xfs_iunlock(dp, XFS_ILOCK_EXCL);
547
548         /*
549          * Hit the inode change time.
550          */
551         if (!error && (flags & ATTR_KERNOTIME) == 0) {
552                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
553         }
554
555         return(error);
556
557 out:
558         if (args.trans)
559                 xfs_trans_cancel(args.trans,
560                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
561         xfs_iunlock(dp, XFS_ILOCK_EXCL);
562         return(error);
563 }
564
565 int
566 xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
567 {
568         xfs_inode_t         *dp;
569         int                 namelen;
570
571         namelen = strlen(name);
572         if (namelen >= MAXNAMELEN)
573                 return EFAULT;          /* match IRIX behaviour */
574
575         XFS_STATS_INC(xs_attr_remove);
576
577         dp = XFS_BHVTOI(bdp);
578         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
579                 return (EIO);
580
581         xfs_ilock(dp, XFS_ILOCK_SHARED);
582         if (XFS_IFORK_Q(dp) == 0 ||
583                    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
584                     dp->i_d.di_anextents == 0)) {
585                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
586                 return(XFS_ERROR(ENOATTR));
587         }
588         xfs_iunlock(dp, XFS_ILOCK_SHARED);
589
590         return xfs_attr_remove_int(dp, name, namelen, flags);
591 }
592
593 int                                                             /* error */
594 xfs_attr_list_int(xfs_attr_list_context_t *context)
595 {
596         int error;
597         xfs_inode_t *dp = context->dp;
598
599         /*
600          * Decide on what work routines to call based on the inode size.
601          */
602         if (XFS_IFORK_Q(dp) == 0 ||
603             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
604              dp->i_d.di_anextents == 0)) {
605                 error = 0;
606         } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
607                 error = xfs_attr_shortform_list(context);
608         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
609                 error = xfs_attr_leaf_list(context);
610         } else {
611                 error = xfs_attr_node_list(context);
612         }
613         return error;
614 }
615
616 #define ATTR_ENTBASESIZE                /* minimum bytes used by an attr */ \
617         (((struct attrlist_ent *) 0)->a_name - (char *) 0)
618 #define ATTR_ENTSIZE(namelen)           /* actual bytes used by an attr */ \
619         ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
620          & ~(sizeof(u_int32_t)-1))
621
622 /*
623  * Format an attribute and copy it out to the user's buffer.
624  * Take care to check values and protect against them changing later,
625  * we may be reading them directly out of a user buffer.
626  */
627 /*ARGSUSED*/
628 STATIC int
629 xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
630                      char *name, int namelen,
631                      int valuelen, char *value)
632 {
633         attrlist_ent_t *aep;
634         int arraytop;
635
636         ASSERT(!(context->flags & ATTR_KERNOVAL));
637         ASSERT(context->count >= 0);
638         ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
639         ASSERT(context->firstu >= sizeof(*context->alist));
640         ASSERT(context->firstu <= context->bufsize);
641
642         arraytop = sizeof(*context->alist) +
643                         context->count * sizeof(context->alist->al_offset[0]);
644         context->firstu -= ATTR_ENTSIZE(namelen);
645         if (context->firstu < arraytop) {
646                 xfs_attr_trace_l_c("buffer full", context);
647                 context->alist->al_more = 1;
648                 context->seen_enough = 1;
649                 return 1;
650         }
651
652         aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
653         aep->a_valuelen = valuelen;
654         memcpy(aep->a_name, name, namelen);
655         aep->a_name[ namelen ] = 0;
656         context->alist->al_offset[ context->count++ ] = context->firstu;
657         context->alist->al_count = context->count;
658         xfs_attr_trace_l_c("add", context);
659         return 0;
660 }
661
662 STATIC int
663 xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
664                      char *name, int namelen,
665                      int valuelen, char *value)
666 {
667         char *offset;
668         int arraytop;
669
670         ASSERT(context->count >= 0);
671
672         arraytop = context->count + namesp->attr_namelen + namelen + 1;
673         if (arraytop > context->firstu) {
674                 context->count = -1;    /* insufficient space */
675                 return 1;
676         }
677         offset = (char *)context->alist + context->count;
678         strncpy(offset, namesp->attr_name, namesp->attr_namelen);
679         offset += namesp->attr_namelen;
680         strncpy(offset, name, namelen);                 /* real name */
681         offset += namelen;
682         *offset = '\0';
683         context->count += namesp->attr_namelen + namelen + 1;
684         return 0;
685 }
686
687 /*ARGSUSED*/
688 STATIC int
689 xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
690                      char *name, int namelen,
691                      int valuelen, char *value)
692 {
693         context->count += namesp->attr_namelen + namelen + 1;
694         return 0;
695 }
696
697 /*
698  * Generate a list of extended attribute names and optionally
699  * also value lengths.  Positive return value follows the XFS
700  * convention of being an error, zero or negative return code
701  * is the length of the buffer returned (negated), indicating
702  * success.
703  */
704 int
705 xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
706                       attrlist_cursor_kern_t *cursor, struct cred *cred)
707 {
708         xfs_attr_list_context_t context;
709         xfs_inode_t *dp;
710         int error;
711
712         XFS_STATS_INC(xs_attr_list);
713
714         /*
715          * Validate the cursor.
716          */
717         if (cursor->pad1 || cursor->pad2)
718                 return(XFS_ERROR(EINVAL));
719         if ((cursor->initted == 0) &&
720             (cursor->hashval || cursor->blkno || cursor->offset))
721                 return XFS_ERROR(EINVAL);
722
723         /*
724          * Check for a properly aligned buffer.
725          */
726         if (((long)buffer) & (sizeof(int)-1))
727                 return XFS_ERROR(EFAULT);
728         if (flags & ATTR_KERNOVAL)
729                 bufsize = 0;
730
731         /*
732          * Initialize the output buffer.
733          */
734         context.dp = dp = XFS_BHVTOI(bdp);
735         context.cursor = cursor;
736         context.count = 0;
737         context.dupcnt = 0;
738         context.resynch = 1;
739         context.flags = flags;
740         context.seen_enough = 0;
741         context.alist = (attrlist_t *)buffer;
742         context.put_value = 0;
743
744         if (flags & ATTR_KERNAMELS) {
745                 context.bufsize = bufsize;
746                 context.firstu = context.bufsize;
747                 if (flags & ATTR_KERNOVAL)
748                         context.put_listent = xfs_attr_kern_list_sizes;
749                 else
750                         context.put_listent = xfs_attr_kern_list;
751         } else {
752                 context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
753                 context.firstu = context.bufsize;
754                 context.alist->al_count = 0;
755                 context.alist->al_more = 0;
756                 context.alist->al_offset[0] = context.bufsize;
757                 context.put_listent = xfs_attr_put_listent;
758         }
759
760         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
761                 return EIO;
762
763         xfs_ilock(dp, XFS_ILOCK_SHARED);
764         xfs_attr_trace_l_c("syscall start", &context);
765
766         error = xfs_attr_list_int(&context);
767
768         xfs_iunlock(dp, XFS_ILOCK_SHARED);
769         xfs_attr_trace_l_c("syscall end", &context);
770
771         if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
772                 /* must return negated buffer size or the error */
773                 if (context.count < 0)
774                         error = XFS_ERROR(ERANGE);
775                 else
776                         error = -context.count;
777         } else
778                 ASSERT(error >= 0);
779
780         return error;
781 }
782
783 int                                                             /* error */
784 xfs_attr_inactive(xfs_inode_t *dp)
785 {
786         xfs_trans_t *trans;
787         xfs_mount_t *mp;
788         int error;
789
790         mp = dp->i_mount;
791         ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
792
793         xfs_ilock(dp, XFS_ILOCK_SHARED);
794         if ((XFS_IFORK_Q(dp) == 0) ||
795             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
796             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
797              dp->i_d.di_anextents == 0)) {
798                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
799                 return(0);
800         }
801         xfs_iunlock(dp, XFS_ILOCK_SHARED);
802
803         /*
804          * Start our first transaction of the day.
805          *
806          * All future transactions during this code must be "chained" off
807          * this one via the trans_dup() call.  All transactions will contain
808          * the inode, and the inode will always be marked with trans_ihold().
809          * Since the inode will be locked in all transactions, we must log
810          * the inode in every transaction to let it float upward through
811          * the log.
812          */
813         trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
814         if ((error = xfs_trans_reserve(trans, 0, XFS_ATTRINVAL_LOG_RES(mp), 0,
815                                       XFS_TRANS_PERM_LOG_RES,
816                                       XFS_ATTRINVAL_LOG_COUNT))) {
817                 xfs_trans_cancel(trans, 0);
818                 return(error);
819         }
820         xfs_ilock(dp, XFS_ILOCK_EXCL);
821
822         /*
823          * No need to make quota reservations here. We expect to release some
824          * blocks, not allocate, in the common case.
825          */
826         xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
827         xfs_trans_ihold(trans, dp);
828
829         /*
830          * Decide on what work routines to call based on the inode size.
831          */
832         if ((XFS_IFORK_Q(dp) == 0) ||
833             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
834             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
835              dp->i_d.di_anextents == 0)) {
836                 error = 0;
837                 goto out;
838         }
839         error = xfs_attr_root_inactive(&trans, dp);
840         if (error)
841                 goto out;
842         /*
843          * signal synchronous inactive transactions unless this
844          * is a synchronous mount filesystem in which case we
845          * know that we're here because we've been called out of
846          * xfs_inactive which means that the last reference is gone
847          * and the unlink transaction has already hit the disk so
848          * async inactive transactions are safe.
849          */
850         if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
851                                 (!(mp->m_flags & XFS_MOUNT_WSYNC)
852                                  ? 1 : 0))))
853                 goto out;
854
855         /*
856          * Commit the last in the sequence of transactions.
857          */
858         xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
859         error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES);
860         xfs_iunlock(dp, XFS_ILOCK_EXCL);
861
862         return(error);
863
864 out:
865         xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
866         xfs_iunlock(dp, XFS_ILOCK_EXCL);
867         return(error);
868 }
869
870
871
872 /*========================================================================
873  * External routines when attribute list is inside the inode
874  *========================================================================*/
875
876 /*
877  * Add a name to the shortform attribute list structure
878  * This is the external routine.
879  */
880 STATIC int
881 xfs_attr_shortform_addname(xfs_da_args_t *args)
882 {
883         int newsize, forkoff, retval;
884
885         retval = xfs_attr_shortform_lookup(args);
886         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
887                 return(retval);
888         } else if (retval == EEXIST) {
889                 if (args->flags & ATTR_CREATE)
890                         return(retval);
891                 retval = xfs_attr_shortform_remove(args);
892                 ASSERT(retval == 0);
893         }
894
895         if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
896             args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
897                 return(XFS_ERROR(ENOSPC));
898
899         newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
900         newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
901
902         forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
903         if (!forkoff)
904                 return(XFS_ERROR(ENOSPC));
905
906         xfs_attr_shortform_add(args, forkoff);
907         return(0);
908 }
909
910
911 /*========================================================================
912  * External routines when attribute list is one block
913  *========================================================================*/
914
915 /*
916  * Add a name to the leaf attribute list structure
917  *
918  * This leaf block cannot have a "remote" value, we only call this routine
919  * if bmap_one_block() says there is only one block (ie: no remote blks).
920  */
921 int
922 xfs_attr_leaf_addname(xfs_da_args_t *args)
923 {
924         xfs_inode_t *dp;
925         xfs_dabuf_t *bp;
926         int retval, error, committed, forkoff;
927
928         /*
929          * Read the (only) block in the attribute list in.
930          */
931         dp = args->dp;
932         args->blkno = 0;
933         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
934                                              XFS_ATTR_FORK);
935         if (error)
936                 return(error);
937         ASSERT(bp != NULL);
938
939         /*
940          * Look up the given attribute in the leaf block.  Figure out if
941          * the given flags produce an error or call for an atomic rename.
942          */
943         retval = xfs_attr_leaf_lookup_int(bp, args);
944         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
945                 xfs_da_brelse(args->trans, bp);
946                 return(retval);
947         } else if (retval == EEXIST) {
948                 if (args->flags & ATTR_CREATE) {        /* pure create op */
949                         xfs_da_brelse(args->trans, bp);
950                         return(retval);
951                 }
952                 args->rename = 1;                       /* an atomic rename */
953                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
954                 args->index2 = args->index;
955                 args->rmtblkno2 = args->rmtblkno;
956                 args->rmtblkcnt2 = args->rmtblkcnt;
957         }
958
959         /*
960          * Add the attribute to the leaf block, transitioning to a Btree
961          * if required.
962          */
963         retval = xfs_attr_leaf_add(bp, args);
964         xfs_da_buf_done(bp);
965         if (retval == ENOSPC) {
966                 /*
967                  * Promote the attribute list to the Btree format, then
968                  * Commit that transaction so that the node_addname() call
969                  * can manage its own transactions.
970                  */
971                 XFS_BMAP_INIT(args->flist, args->firstblock);
972                 error = xfs_attr_leaf_to_node(args);
973                 if (!error) {
974                         error = xfs_bmap_finish(&args->trans, args->flist,
975                                                 &committed);
976                 }
977                 if (error) {
978                         ASSERT(committed);
979                         args->trans = NULL;
980                         xfs_bmap_cancel(args->flist);
981                         return(error);
982                 }
983
984                 /*
985                  * bmap_finish() may have committed the last trans and started
986                  * a new one.  We need the inode to be in all transactions.
987                  */
988                 if (committed) {
989                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
990                         xfs_trans_ihold(args->trans, dp);
991                 }
992
993                 /*
994                  * Commit the current trans (including the inode) and start
995                  * a new one.
996                  */
997                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
998                         return (error);
999
1000                 /*
1001                  * Fob the whole rest of the problem off on the Btree code.
1002                  */
1003                 error = xfs_attr_node_addname(args);
1004                 return(error);
1005         }
1006
1007         /*
1008          * Commit the transaction that added the attr name so that
1009          * later routines can manage their own transactions.
1010          */
1011         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1012                 return (error);
1013
1014         /*
1015          * If there was an out-of-line value, allocate the blocks we
1016          * identified for its storage and copy the value.  This is done
1017          * after we create the attribute so that we don't overflow the
1018          * maximum size of a transaction and/or hit a deadlock.
1019          */
1020         if (args->rmtblkno > 0) {
1021                 error = xfs_attr_rmtval_set(args);
1022                 if (error)
1023                         return(error);
1024         }
1025
1026         /*
1027          * If this is an atomic rename operation, we must "flip" the
1028          * incomplete flags on the "new" and "old" attribute/value pairs
1029          * so that one disappears and one appears atomically.  Then we
1030          * must remove the "old" attribute/value pair.
1031          */
1032         if (args->rename) {
1033                 /*
1034                  * In a separate transaction, set the incomplete flag on the
1035                  * "old" attr and clear the incomplete flag on the "new" attr.
1036                  */
1037                 error = xfs_attr_leaf_flipflags(args);
1038                 if (error)
1039                         return(error);
1040
1041                 /*
1042                  * Dismantle the "old" attribute/value pair by removing
1043                  * a "remote" value (if it exists).
1044                  */
1045                 args->index = args->index2;
1046                 args->blkno = args->blkno2;
1047                 args->rmtblkno = args->rmtblkno2;
1048                 args->rmtblkcnt = args->rmtblkcnt2;
1049                 if (args->rmtblkno) {
1050                         error = xfs_attr_rmtval_remove(args);
1051                         if (error)
1052                                 return(error);
1053                 }
1054
1055                 /*
1056                  * Read in the block containing the "old" attr, then
1057                  * remove the "old" attr from that block (neat, huh!)
1058                  */
1059                 error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1,
1060                                                      &bp, XFS_ATTR_FORK);
1061                 if (error)
1062                         return(error);
1063                 ASSERT(bp != NULL);
1064                 (void)xfs_attr_leaf_remove(bp, args);
1065
1066                 /*
1067                  * If the result is small enough, shrink it all into the inode.
1068                  */
1069                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1070                         XFS_BMAP_INIT(args->flist, args->firstblock);
1071                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1072                         /* bp is gone due to xfs_da_shrink_inode */
1073                         if (!error) {
1074                                 error = xfs_bmap_finish(&args->trans,
1075                                                         args->flist,
1076                                                         &committed);
1077                         }
1078                         if (error) {
1079                                 ASSERT(committed);
1080                                 args->trans = NULL;
1081                                 xfs_bmap_cancel(args->flist);
1082                                 return(error);
1083                         }
1084
1085                         /*
1086                          * bmap_finish() may have committed the last trans
1087                          * and started a new one.  We need the inode to be
1088                          * in all transactions.
1089                          */
1090                         if (committed) {
1091                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1092                                 xfs_trans_ihold(args->trans, dp);
1093                         }
1094                 } else
1095                         xfs_da_buf_done(bp);
1096
1097                 /*
1098                  * Commit the remove and start the next trans in series.
1099                  */
1100                 error = xfs_attr_rolltrans(&args->trans, dp);
1101
1102         } else if (args->rmtblkno > 0) {
1103                 /*
1104                  * Added a "remote" value, just clear the incomplete flag.
1105                  */
1106                 error = xfs_attr_leaf_clearflag(args);
1107         }
1108         return(error);
1109 }
1110
1111 /*
1112  * Remove a name from the leaf attribute list structure
1113  *
1114  * This leaf block cannot have a "remote" value, we only call this routine
1115  * if bmap_one_block() says there is only one block (ie: no remote blks).
1116  */
1117 STATIC int
1118 xfs_attr_leaf_removename(xfs_da_args_t *args)
1119 {
1120         xfs_inode_t *dp;
1121         xfs_dabuf_t *bp;
1122         int error, committed, forkoff;
1123
1124         /*
1125          * Remove the attribute.
1126          */
1127         dp = args->dp;
1128         args->blkno = 0;
1129         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1130                                              XFS_ATTR_FORK);
1131         if (error) {
1132                 return(error);
1133         }
1134
1135         ASSERT(bp != NULL);
1136         error = xfs_attr_leaf_lookup_int(bp, args);
1137         if (error == ENOATTR) {
1138                 xfs_da_brelse(args->trans, bp);
1139                 return(error);
1140         }
1141
1142         (void)xfs_attr_leaf_remove(bp, args);
1143
1144         /*
1145          * If the result is small enough, shrink it all into the inode.
1146          */
1147         if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1148                 XFS_BMAP_INIT(args->flist, args->firstblock);
1149                 error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1150                 /* bp is gone due to xfs_da_shrink_inode */
1151                 if (!error) {
1152                         error = xfs_bmap_finish(&args->trans, args->flist,
1153                                                 &committed);
1154                 }
1155                 if (error) {
1156                         ASSERT(committed);
1157                         args->trans = NULL;
1158                         xfs_bmap_cancel(args->flist);
1159                         return(error);
1160                 }
1161
1162                 /*
1163                  * bmap_finish() may have committed the last trans and started
1164                  * a new one.  We need the inode to be in all transactions.
1165                  */
1166                 if (committed) {
1167                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1168                         xfs_trans_ihold(args->trans, dp);
1169                 }
1170         } else
1171                 xfs_da_buf_done(bp);
1172         return(0);
1173 }
1174
1175 /*
1176  * Look up a name in a leaf attribute list structure.
1177  *
1178  * This leaf block cannot have a "remote" value, we only call this routine
1179  * if bmap_one_block() says there is only one block (ie: no remote blks).
1180  */
1181 STATIC int
1182 xfs_attr_leaf_get(xfs_da_args_t *args)
1183 {
1184         xfs_dabuf_t *bp;
1185         int error;
1186
1187         args->blkno = 0;
1188         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1189                                              XFS_ATTR_FORK);
1190         if (error)
1191                 return(error);
1192         ASSERT(bp != NULL);
1193
1194         error = xfs_attr_leaf_lookup_int(bp, args);
1195         if (error != EEXIST)  {
1196                 xfs_da_brelse(args->trans, bp);
1197                 return(error);
1198         }
1199         error = xfs_attr_leaf_getvalue(bp, args);
1200         xfs_da_brelse(args->trans, bp);
1201         if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
1202                 error = xfs_attr_rmtval_get(args);
1203         }
1204         return(error);
1205 }
1206
1207 /*
1208  * Copy out attribute entries for attr_list(), for leaf attribute lists.
1209  */
1210 STATIC int
1211 xfs_attr_leaf_list(xfs_attr_list_context_t *context)
1212 {
1213         xfs_attr_leafblock_t *leaf;
1214         int error;
1215         xfs_dabuf_t *bp;
1216
1217         context->cursor->blkno = 0;
1218         error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
1219         if (error)
1220                 return XFS_ERROR(error);
1221         ASSERT(bp != NULL);
1222         leaf = bp->data;
1223         if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
1224                 XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
1225                                      context->dp->i_mount, leaf);
1226                 xfs_da_brelse(NULL, bp);
1227                 return XFS_ERROR(EFSCORRUPTED);
1228         }
1229
1230         error = xfs_attr_leaf_list_int(bp, context);
1231         xfs_da_brelse(NULL, bp);
1232         return XFS_ERROR(error);
1233 }
1234
1235
1236 /*========================================================================
1237  * External routines when attribute list size > XFS_LBSIZE(mp).
1238  *========================================================================*/
1239
1240 /*
1241  * Add a name to a Btree-format attribute list.
1242  *
1243  * This will involve walking down the Btree, and may involve splitting
1244  * leaf nodes and even splitting intermediate nodes up to and including
1245  * the root node (a special case of an intermediate node).
1246  *
1247  * "Remote" attribute values confuse the issue and atomic rename operations
1248  * add a whole extra layer of confusion on top of that.
1249  */
1250 STATIC int
1251 xfs_attr_node_addname(xfs_da_args_t *args)
1252 {
1253         xfs_da_state_t *state;
1254         xfs_da_state_blk_t *blk;
1255         xfs_inode_t *dp;
1256         xfs_mount_t *mp;
1257         int committed, retval, error;
1258
1259         /*
1260          * Fill in bucket of arguments/results/context to carry around.
1261          */
1262         dp = args->dp;
1263         mp = dp->i_mount;
1264 restart:
1265         state = xfs_da_state_alloc();
1266         state->args = args;
1267         state->mp = mp;
1268         state->blocksize = state->mp->m_sb.sb_blocksize;
1269         state->node_ents = state->mp->m_attr_node_ents;
1270
1271         /*
1272          * Search to see if name already exists, and get back a pointer
1273          * to where it should go.
1274          */
1275         error = xfs_da_node_lookup_int(state, &retval);
1276         if (error)
1277                 goto out;
1278         blk = &state->path.blk[ state->path.active-1 ];
1279         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1280         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
1281                 goto out;
1282         } else if (retval == EEXIST) {
1283                 if (args->flags & ATTR_CREATE)
1284                         goto out;
1285                 args->rename = 1;                       /* atomic rename op */
1286                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
1287                 args->index2 = args->index;
1288                 args->rmtblkno2 = args->rmtblkno;
1289                 args->rmtblkcnt2 = args->rmtblkcnt;
1290                 args->rmtblkno = 0;
1291                 args->rmtblkcnt = 0;
1292         }
1293
1294         retval = xfs_attr_leaf_add(blk->bp, state->args);
1295         if (retval == ENOSPC) {
1296                 if (state->path.active == 1) {
1297                         /*
1298                          * Its really a single leaf node, but it had
1299                          * out-of-line values so it looked like it *might*
1300                          * have been a b-tree.
1301                          */
1302                         xfs_da_state_free(state);
1303                         XFS_BMAP_INIT(args->flist, args->firstblock);
1304                         error = xfs_attr_leaf_to_node(args);
1305                         if (!error) {
1306                                 error = xfs_bmap_finish(&args->trans,
1307                                                         args->flist,
1308                                                         &committed);
1309                         }
1310                         if (error) {
1311                                 ASSERT(committed);
1312                                 args->trans = NULL;
1313                                 xfs_bmap_cancel(args->flist);
1314                                 goto out;
1315                         }
1316
1317                         /*
1318                          * bmap_finish() may have committed the last trans
1319                          * and started a new one.  We need the inode to be
1320                          * in all transactions.
1321                          */
1322                         if (committed) {
1323                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1324                                 xfs_trans_ihold(args->trans, dp);
1325                         }
1326
1327                         /*
1328                          * Commit the node conversion and start the next
1329                          * trans in the chain.
1330                          */
1331                         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1332                                 goto out;
1333
1334                         goto restart;
1335                 }
1336
1337                 /*
1338                  * Split as many Btree elements as required.
1339                  * This code tracks the new and old attr's location
1340                  * in the index/blkno/rmtblkno/rmtblkcnt fields and
1341                  * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
1342                  */
1343                 XFS_BMAP_INIT(args->flist, args->firstblock);
1344                 error = xfs_da_split(state);
1345                 if (!error) {
1346                         error = xfs_bmap_finish(&args->trans, args->flist,
1347                                                 &committed);
1348                 }
1349                 if (error) {
1350                         ASSERT(committed);
1351                         args->trans = NULL;
1352                         xfs_bmap_cancel(args->flist);
1353                         goto out;
1354                 }
1355
1356                 /*
1357                  * bmap_finish() may have committed the last trans and started
1358                  * a new one.  We need the inode to be in all transactions.
1359                  */
1360                 if (committed) {
1361                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1362                         xfs_trans_ihold(args->trans, dp);
1363                 }
1364         } else {
1365                 /*
1366                  * Addition succeeded, update Btree hashvals.
1367                  */
1368                 xfs_da_fixhashpath(state, &state->path);
1369         }
1370
1371         /*
1372          * Kill the state structure, we're done with it and need to
1373          * allow the buffers to come back later.
1374          */
1375         xfs_da_state_free(state);
1376         state = NULL;
1377
1378         /*
1379          * Commit the leaf addition or btree split and start the next
1380          * trans in the chain.
1381          */
1382         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1383                 goto out;
1384
1385         /*
1386          * If there was an out-of-line value, allocate the blocks we
1387          * identified for its storage and copy the value.  This is done
1388          * after we create the attribute so that we don't overflow the
1389          * maximum size of a transaction and/or hit a deadlock.
1390          */
1391         if (args->rmtblkno > 0) {
1392                 error = xfs_attr_rmtval_set(args);
1393                 if (error)
1394                         return(error);
1395         }
1396
1397         /*
1398          * If this is an atomic rename operation, we must "flip" the
1399          * incomplete flags on the "new" and "old" attribute/value pairs
1400          * so that one disappears and one appears atomically.  Then we
1401          * must remove the "old" attribute/value pair.
1402          */
1403         if (args->rename) {
1404                 /*
1405                  * In a separate transaction, set the incomplete flag on the
1406                  * "old" attr and clear the incomplete flag on the "new" attr.
1407                  */
1408                 error = xfs_attr_leaf_flipflags(args);
1409                 if (error)
1410                         goto out;
1411
1412                 /*
1413                  * Dismantle the "old" attribute/value pair by removing
1414                  * a "remote" value (if it exists).
1415                  */
1416                 args->index = args->index2;
1417                 args->blkno = args->blkno2;
1418                 args->rmtblkno = args->rmtblkno2;
1419                 args->rmtblkcnt = args->rmtblkcnt2;
1420                 if (args->rmtblkno) {
1421                         error = xfs_attr_rmtval_remove(args);
1422                         if (error)
1423                                 return(error);
1424                 }
1425
1426                 /*
1427                  * Re-find the "old" attribute entry after any split ops.
1428                  * The INCOMPLETE flag means that we will find the "old"
1429                  * attr, not the "new" one.
1430                  */
1431                 args->flags |= XFS_ATTR_INCOMPLETE;
1432                 state = xfs_da_state_alloc();
1433                 state->args = args;
1434                 state->mp = mp;
1435                 state->blocksize = state->mp->m_sb.sb_blocksize;
1436                 state->node_ents = state->mp->m_attr_node_ents;
1437                 state->inleaf = 0;
1438                 error = xfs_da_node_lookup_int(state, &retval);
1439                 if (error)
1440                         goto out;
1441
1442                 /*
1443                  * Remove the name and update the hashvals in the tree.
1444                  */
1445                 blk = &state->path.blk[ state->path.active-1 ];
1446                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1447                 error = xfs_attr_leaf_remove(blk->bp, args);
1448                 xfs_da_fixhashpath(state, &state->path);
1449
1450                 /*
1451                  * Check to see if the tree needs to be collapsed.
1452                  */
1453                 if (retval && (state->path.active > 1)) {
1454                         XFS_BMAP_INIT(args->flist, args->firstblock);
1455                         error = xfs_da_join(state);
1456                         if (!error) {
1457                                 error = xfs_bmap_finish(&args->trans,
1458                                                         args->flist,
1459                                                         &committed);
1460                         }
1461                         if (error) {
1462                                 ASSERT(committed);
1463                                 args->trans = NULL;
1464                                 xfs_bmap_cancel(args->flist);
1465                                 goto out;
1466                         }
1467
1468                         /*
1469                          * bmap_finish() may have committed the last trans
1470                          * and started a new one.  We need the inode to be
1471                          * in all transactions.
1472                          */
1473                         if (committed) {
1474                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1475                                 xfs_trans_ihold(args->trans, dp);
1476                         }
1477                 }
1478
1479                 /*
1480                  * Commit and start the next trans in the chain.
1481                  */
1482                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1483                         goto out;
1484
1485         } else if (args->rmtblkno > 0) {
1486                 /*
1487                  * Added a "remote" value, just clear the incomplete flag.
1488                  */
1489                 error = xfs_attr_leaf_clearflag(args);
1490                 if (error)
1491                         goto out;
1492         }
1493         retval = error = 0;
1494
1495 out:
1496         if (state)
1497                 xfs_da_state_free(state);
1498         if (error)
1499                 return(error);
1500         return(retval);
1501 }
1502
1503 /*
1504  * Remove a name from a B-tree attribute list.
1505  *
1506  * This will involve walking down the Btree, and may involve joining
1507  * leaf nodes and even joining intermediate nodes up to and including
1508  * the root node (a special case of an intermediate node).
1509  */
1510 STATIC int
1511 xfs_attr_node_removename(xfs_da_args_t *args)
1512 {
1513         xfs_da_state_t *state;
1514         xfs_da_state_blk_t *blk;
1515         xfs_inode_t *dp;
1516         xfs_dabuf_t *bp;
1517         int retval, error, committed, forkoff;
1518
1519         /*
1520          * Tie a string around our finger to remind us where we are.
1521          */
1522         dp = args->dp;
1523         state = xfs_da_state_alloc();
1524         state->args = args;
1525         state->mp = dp->i_mount;
1526         state->blocksize = state->mp->m_sb.sb_blocksize;
1527         state->node_ents = state->mp->m_attr_node_ents;
1528
1529         /*
1530          * Search to see if name exists, and get back a pointer to it.
1531          */
1532         error = xfs_da_node_lookup_int(state, &retval);
1533         if (error || (retval != EEXIST)) {
1534                 if (error == 0)
1535                         error = retval;
1536                 goto out;
1537         }
1538
1539         /*
1540          * If there is an out-of-line value, de-allocate the blocks.
1541          * This is done before we remove the attribute so that we don't
1542          * overflow the maximum size of a transaction and/or hit a deadlock.
1543          */
1544         blk = &state->path.blk[ state->path.active-1 ];
1545         ASSERT(blk->bp != NULL);
1546         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1547         if (args->rmtblkno > 0) {
1548                 /*
1549                  * Fill in disk block numbers in the state structure
1550                  * so that we can get the buffers back after we commit
1551                  * several transactions in the following calls.
1552                  */
1553                 error = xfs_attr_fillstate(state);
1554                 if (error)
1555                         goto out;
1556
1557                 /*
1558                  * Mark the attribute as INCOMPLETE, then bunmapi() the
1559                  * remote value.
1560                  */
1561                 error = xfs_attr_leaf_setflag(args);
1562                 if (error)
1563                         goto out;
1564                 error = xfs_attr_rmtval_remove(args);
1565                 if (error)
1566                         goto out;
1567
1568                 /*
1569                  * Refill the state structure with buffers, the prior calls
1570                  * released our buffers.
1571                  */
1572                 error = xfs_attr_refillstate(state);
1573                 if (error)
1574                         goto out;
1575         }
1576
1577         /*
1578          * Remove the name and update the hashvals in the tree.
1579          */
1580         blk = &state->path.blk[ state->path.active-1 ];
1581         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1582         retval = xfs_attr_leaf_remove(blk->bp, args);
1583         xfs_da_fixhashpath(state, &state->path);
1584
1585         /*
1586          * Check to see if the tree needs to be collapsed.
1587          */
1588         if (retval && (state->path.active > 1)) {
1589                 XFS_BMAP_INIT(args->flist, args->firstblock);
1590                 error = xfs_da_join(state);
1591                 if (!error) {
1592                         error = xfs_bmap_finish(&args->trans, args->flist,
1593                                                 &committed);
1594                 }
1595                 if (error) {
1596                         ASSERT(committed);
1597                         args->trans = NULL;
1598                         xfs_bmap_cancel(args->flist);
1599                         goto out;
1600                 }
1601
1602                 /*
1603                  * bmap_finish() may have committed the last trans and started
1604                  * a new one.  We need the inode to be in all transactions.
1605                  */
1606                 if (committed) {
1607                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1608                         xfs_trans_ihold(args->trans, dp);
1609                 }
1610
1611                 /*
1612                  * Commit the Btree join operation and start a new trans.
1613                  */
1614                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1615                         goto out;
1616         }
1617
1618         /*
1619          * If the result is small enough, push it all into the inode.
1620          */
1621         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1622                 /*
1623                  * Have to get rid of the copy of this dabuf in the state.
1624                  */
1625                 ASSERT(state->path.active == 1);
1626                 ASSERT(state->path.blk[0].bp);
1627                 xfs_da_buf_done(state->path.blk[0].bp);
1628                 state->path.blk[0].bp = NULL;
1629
1630                 error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
1631                                                      XFS_ATTR_FORK);
1632                 if (error)
1633                         goto out;
1634                 ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
1635                                       bp->data)->hdr.info.magic)
1636                                                        == XFS_ATTR_LEAF_MAGIC);
1637
1638                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1639                         XFS_BMAP_INIT(args->flist, args->firstblock);
1640                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1641                         /* bp is gone due to xfs_da_shrink_inode */
1642                         if (!error) {
1643                                 error = xfs_bmap_finish(&args->trans,
1644                                                         args->flist,
1645                                                         &committed);
1646                         }
1647                         if (error) {
1648                                 ASSERT(committed);
1649                                 args->trans = NULL;
1650                                 xfs_bmap_cancel(args->flist);
1651                                 goto out;
1652                         }
1653
1654                         /*
1655                          * bmap_finish() may have committed the last trans
1656                          * and started a new one.  We need the inode to be
1657                          * in all transactions.
1658                          */
1659                         if (committed) {
1660                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1661                                 xfs_trans_ihold(args->trans, dp);
1662                         }
1663                 } else
1664                         xfs_da_brelse(args->trans, bp);
1665         }
1666         error = 0;
1667
1668 out:
1669         xfs_da_state_free(state);
1670         return(error);
1671 }
1672
1673 /*
1674  * Fill in the disk block numbers in the state structure for the buffers
1675  * that are attached to the state structure.
1676  * This is done so that we can quickly reattach ourselves to those buffers
1677  * after some set of transaction commits have released these buffers.
1678  */
1679 STATIC int
1680 xfs_attr_fillstate(xfs_da_state_t *state)
1681 {
1682         xfs_da_state_path_t *path;
1683         xfs_da_state_blk_t *blk;
1684         int level;
1685
1686         /*
1687          * Roll down the "path" in the state structure, storing the on-disk
1688          * block number for those buffers in the "path".
1689          */
1690         path = &state->path;
1691         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1692         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1693                 if (blk->bp) {
1694                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1695                         xfs_da_buf_done(blk->bp);
1696                         blk->bp = NULL;
1697                 } else {
1698                         blk->disk_blkno = 0;
1699                 }
1700         }
1701
1702         /*
1703          * Roll down the "altpath" in the state structure, storing the on-disk
1704          * block number for those buffers in the "altpath".
1705          */
1706         path = &state->altpath;
1707         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1708         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1709                 if (blk->bp) {
1710                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1711                         xfs_da_buf_done(blk->bp);
1712                         blk->bp = NULL;
1713                 } else {
1714                         blk->disk_blkno = 0;
1715                 }
1716         }
1717
1718         return(0);
1719 }
1720
1721 /*
1722  * Reattach the buffers to the state structure based on the disk block
1723  * numbers stored in the state structure.
1724  * This is done after some set of transaction commits have released those
1725  * buffers from our grip.
1726  */
1727 STATIC int
1728 xfs_attr_refillstate(xfs_da_state_t *state)
1729 {
1730         xfs_da_state_path_t *path;
1731         xfs_da_state_blk_t *blk;
1732         int level, error;
1733
1734         /*
1735          * Roll down the "path" in the state structure, storing the on-disk
1736          * block number for those buffers in the "path".
1737          */
1738         path = &state->path;
1739         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1740         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1741                 if (blk->disk_blkno) {
1742                         error = xfs_da_read_buf(state->args->trans,
1743                                                 state->args->dp,
1744                                                 blk->blkno, blk->disk_blkno,
1745                                                 &blk->bp, XFS_ATTR_FORK);
1746                         if (error)
1747                                 return(error);
1748                 } else {
1749                         blk->bp = NULL;
1750                 }
1751         }
1752
1753         /*
1754          * Roll down the "altpath" in the state structure, storing the on-disk
1755          * block number for those buffers in the "altpath".
1756          */
1757         path = &state->altpath;
1758         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1759         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1760                 if (blk->disk_blkno) {
1761                         error = xfs_da_read_buf(state->args->trans,
1762                                                 state->args->dp,
1763                                                 blk->blkno, blk->disk_blkno,
1764                                                 &blk->bp, XFS_ATTR_FORK);
1765                         if (error)
1766                                 return(error);
1767                 } else {
1768                         blk->bp = NULL;
1769                 }
1770         }
1771
1772         return(0);
1773 }
1774
1775 /*
1776  * Look up a filename in a node attribute list.
1777  *
1778  * This routine gets called for any attribute fork that has more than one
1779  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1780  * "remote" values taking up more blocks.
1781  */
1782 STATIC int
1783 xfs_attr_node_get(xfs_da_args_t *args)
1784 {
1785         xfs_da_state_t *state;
1786         xfs_da_state_blk_t *blk;
1787         int error, retval;
1788         int i;
1789
1790         state = xfs_da_state_alloc();
1791         state->args = args;
1792         state->mp = args->dp->i_mount;
1793         state->blocksize = state->mp->m_sb.sb_blocksize;
1794         state->node_ents = state->mp->m_attr_node_ents;
1795
1796         /*
1797          * Search to see if name exists, and get back a pointer to it.
1798          */
1799         error = xfs_da_node_lookup_int(state, &retval);
1800         if (error) {
1801                 retval = error;
1802         } else if (retval == EEXIST) {
1803                 blk = &state->path.blk[ state->path.active-1 ];
1804                 ASSERT(blk->bp != NULL);
1805                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1806
1807                 /*
1808                  * Get the value, local or "remote"
1809                  */
1810                 retval = xfs_attr_leaf_getvalue(blk->bp, args);
1811                 if (!retval && (args->rmtblkno > 0)
1812                     && !(args->flags & ATTR_KERNOVAL)) {
1813                         retval = xfs_attr_rmtval_get(args);
1814                 }
1815         }
1816
1817         /*
1818          * If not in a transaction, we have to release all the buffers.
1819          */
1820         for (i = 0; i < state->path.active; i++) {
1821                 xfs_da_brelse(args->trans, state->path.blk[i].bp);
1822                 state->path.blk[i].bp = NULL;
1823         }
1824
1825         xfs_da_state_free(state);
1826         return(retval);
1827 }
1828
1829 STATIC int                                                      /* error */
1830 xfs_attr_node_list(xfs_attr_list_context_t *context)
1831 {
1832         attrlist_cursor_kern_t *cursor;
1833         xfs_attr_leafblock_t *leaf;
1834         xfs_da_intnode_t *node;
1835         xfs_da_node_entry_t *btree;
1836         int error, i;
1837         xfs_dabuf_t *bp;
1838
1839         cursor = context->cursor;
1840         cursor->initted = 1;
1841
1842         /*
1843          * Do all sorts of validation on the passed-in cursor structure.
1844          * If anything is amiss, ignore the cursor and look up the hashval
1845          * starting from the btree root.
1846          */
1847         bp = NULL;
1848         if (cursor->blkno > 0) {
1849                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1850                                               &bp, XFS_ATTR_FORK);
1851                 if ((error != 0) && (error != EFSCORRUPTED))
1852                         return(error);
1853                 if (bp) {
1854                         node = bp->data;
1855                         switch (be16_to_cpu(node->hdr.info.magic)) {
1856                         case XFS_DA_NODE_MAGIC:
1857                                 xfs_attr_trace_l_cn("wrong blk", context, node);
1858                                 xfs_da_brelse(NULL, bp);
1859                                 bp = NULL;
1860                                 break;
1861                         case XFS_ATTR_LEAF_MAGIC:
1862                                 leaf = bp->data;
1863                                 if (cursor->hashval > be32_to_cpu(leaf->entries[
1864                                     be16_to_cpu(leaf->hdr.count)-1].hashval)) {
1865                                         xfs_attr_trace_l_cl("wrong blk",
1866                                                            context, leaf);
1867                                         xfs_da_brelse(NULL, bp);
1868                                         bp = NULL;
1869                                 } else if (cursor->hashval <=
1870                                              be32_to_cpu(leaf->entries[0].hashval)) {
1871                                         xfs_attr_trace_l_cl("maybe wrong blk",
1872                                                            context, leaf);
1873                                         xfs_da_brelse(NULL, bp);
1874                                         bp = NULL;
1875                                 }
1876                                 break;
1877                         default:
1878                                 xfs_attr_trace_l_c("wrong blk - ??", context);
1879                                 xfs_da_brelse(NULL, bp);
1880                                 bp = NULL;
1881                         }
1882                 }
1883         }
1884
1885         /*
1886          * We did not find what we expected given the cursor's contents,
1887          * so we start from the top and work down based on the hash value.
1888          * Note that start of node block is same as start of leaf block.
1889          */
1890         if (bp == NULL) {
1891                 cursor->blkno = 0;
1892                 for (;;) {
1893                         error = xfs_da_read_buf(NULL, context->dp,
1894                                                       cursor->blkno, -1, &bp,
1895                                                       XFS_ATTR_FORK);
1896                         if (error)
1897                                 return(error);
1898                         if (unlikely(bp == NULL)) {
1899                                 XFS_ERROR_REPORT("xfs_attr_node_list(2)",
1900                                                  XFS_ERRLEVEL_LOW,
1901                                                  context->dp->i_mount);
1902                                 return(XFS_ERROR(EFSCORRUPTED));
1903                         }
1904                         node = bp->data;
1905                         if (be16_to_cpu(node->hdr.info.magic)
1906                                                         == XFS_ATTR_LEAF_MAGIC)
1907                                 break;
1908                         if (unlikely(be16_to_cpu(node->hdr.info.magic)
1909                                                         != XFS_DA_NODE_MAGIC)) {
1910                                 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
1911                                                      XFS_ERRLEVEL_LOW,
1912                                                      context->dp->i_mount,
1913                                                      node);
1914                                 xfs_da_brelse(NULL, bp);
1915                                 return(XFS_ERROR(EFSCORRUPTED));
1916                         }
1917                         btree = node->btree;
1918                         for (i = 0; i < be16_to_cpu(node->hdr.count);
1919                                                                 btree++, i++) {
1920                                 if (cursor->hashval
1921                                                 <= be32_to_cpu(btree->hashval)) {
1922                                         cursor->blkno = be32_to_cpu(btree->before);
1923                                         xfs_attr_trace_l_cb("descending",
1924                                                             context, btree);
1925                                         break;
1926                                 }
1927                         }
1928                         if (i == be16_to_cpu(node->hdr.count)) {
1929                                 xfs_da_brelse(NULL, bp);
1930                                 return(0);
1931                         }
1932                         xfs_da_brelse(NULL, bp);
1933                 }
1934         }
1935         ASSERT(bp != NULL);
1936
1937         /*
1938          * Roll upward through the blocks, processing each leaf block in
1939          * order.  As long as there is space in the result buffer, keep
1940          * adding the information.
1941          */
1942         for (;;) {
1943                 leaf = bp->data;
1944                 if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
1945                                                 != XFS_ATTR_LEAF_MAGIC)) {
1946                         XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
1947                                              XFS_ERRLEVEL_LOW,
1948                                              context->dp->i_mount, leaf);
1949                         xfs_da_brelse(NULL, bp);
1950                         return(XFS_ERROR(EFSCORRUPTED));
1951                 }
1952                 error = xfs_attr_leaf_list_int(bp, context);
1953                 if (error) {
1954                         xfs_da_brelse(NULL, bp);
1955                         return error;
1956                 }
1957                 if (context->seen_enough || leaf->hdr.info.forw == 0)
1958                         break;
1959                 cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
1960                 xfs_da_brelse(NULL, bp);
1961                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1962                                               &bp, XFS_ATTR_FORK);
1963                 if (error)
1964                         return(error);
1965                 if (unlikely((bp == NULL))) {
1966                         XFS_ERROR_REPORT("xfs_attr_node_list(5)",
1967                                          XFS_ERRLEVEL_LOW,
1968                                          context->dp->i_mount);
1969                         return(XFS_ERROR(EFSCORRUPTED));
1970                 }
1971         }
1972         xfs_da_brelse(NULL, bp);
1973         return(0);
1974 }
1975
1976
1977 /*========================================================================
1978  * External routines for manipulating out-of-line attribute values.
1979  *========================================================================*/
1980
1981 /*
1982  * Read the value associated with an attribute from the out-of-line buffer
1983  * that we stored it in.
1984  */
1985 int
1986 xfs_attr_rmtval_get(xfs_da_args_t *args)
1987 {
1988         xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
1989         xfs_mount_t *mp;
1990         xfs_daddr_t dblkno;
1991         xfs_caddr_t dst;
1992         xfs_buf_t *bp;
1993         int nmap, error, tmp, valuelen, blkcnt, i;
1994         xfs_dablk_t lblkno;
1995
1996         ASSERT(!(args->flags & ATTR_KERNOVAL));
1997
1998         mp = args->dp->i_mount;
1999         dst = args->value;
2000         valuelen = args->valuelen;
2001         lblkno = args->rmtblkno;
2002         while (valuelen > 0) {
2003                 nmap = ATTR_RMTVALUE_MAPSIZE;
2004                 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
2005                                   args->rmtblkcnt,
2006                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2007                                   NULL, 0, map, &nmap, NULL, NULL);
2008                 if (error)
2009                         return(error);
2010                 ASSERT(nmap >= 1);
2011
2012                 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
2013                         ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
2014                                (map[i].br_startblock != HOLESTARTBLOCK));
2015                         dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2016                         blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2017                         error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2018                                              blkcnt, XFS_BUF_LOCK, &bp);
2019                         if (error)
2020                                 return(error);
2021
2022                         tmp = (valuelen < XFS_BUF_SIZE(bp))
2023                                 ? valuelen : XFS_BUF_SIZE(bp);
2024                         xfs_biomove(bp, 0, tmp, dst, XFS_B_READ);
2025                         xfs_buf_relse(bp);
2026                         dst += tmp;
2027                         valuelen -= tmp;
2028
2029                         lblkno += map[i].br_blockcount;
2030                 }
2031         }
2032         ASSERT(valuelen == 0);
2033         return(0);
2034 }
2035
2036 /*
2037  * Write the value associated with an attribute into the out-of-line buffer
2038  * that we have defined for it.
2039  */
2040 STATIC int
2041 xfs_attr_rmtval_set(xfs_da_args_t *args)
2042 {
2043         xfs_mount_t *mp;
2044         xfs_fileoff_t lfileoff;
2045         xfs_inode_t *dp;
2046         xfs_bmbt_irec_t map;
2047         xfs_daddr_t dblkno;
2048         xfs_caddr_t src;
2049         xfs_buf_t *bp;
2050         xfs_dablk_t lblkno;
2051         int blkcnt, valuelen, nmap, error, tmp, committed;
2052
2053         dp = args->dp;
2054         mp = dp->i_mount;
2055         src = args->value;
2056
2057         /*
2058          * Find a "hole" in the attribute address space large enough for
2059          * us to drop the new attribute's value into.
2060          */
2061         blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
2062         lfileoff = 0;
2063         error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
2064                                                    XFS_ATTR_FORK);
2065         if (error) {
2066                 return(error);
2067         }
2068         args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
2069         args->rmtblkcnt = blkcnt;
2070
2071         /*
2072          * Roll through the "value", allocating blocks on disk as required.
2073          */
2074         while (blkcnt > 0) {
2075                 /*
2076                  * Allocate a single extent, up to the size of the value.
2077                  */
2078                 XFS_BMAP_INIT(args->flist, args->firstblock);
2079                 nmap = 1;
2080                 error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno,
2081                                   blkcnt,
2082                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
2083                                                         XFS_BMAPI_WRITE,
2084                                   args->firstblock, args->total, &map, &nmap,
2085                                   args->flist, NULL);
2086                 if (!error) {
2087                         error = xfs_bmap_finish(&args->trans, args->flist,
2088                                                 &committed);
2089                 }
2090                 if (error) {
2091                         ASSERT(committed);
2092                         args->trans = NULL;
2093                         xfs_bmap_cancel(args->flist);
2094                         return(error);
2095                 }
2096
2097                 /*
2098                  * bmap_finish() may have committed the last trans and started
2099                  * a new one.  We need the inode to be in all transactions.
2100                  */
2101                 if (committed) {
2102                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
2103                         xfs_trans_ihold(args->trans, dp);
2104                 }
2105
2106                 ASSERT(nmap == 1);
2107                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2108                        (map.br_startblock != HOLESTARTBLOCK));
2109                 lblkno += map.br_blockcount;
2110                 blkcnt -= map.br_blockcount;
2111
2112                 /*
2113                  * Start the next trans in the chain.
2114                  */
2115                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
2116                         return (error);
2117         }
2118
2119         /*
2120          * Roll through the "value", copying the attribute value to the
2121          * already-allocated blocks.  Blocks are written synchronously
2122          * so that we can know they are all on disk before we turn off
2123          * the INCOMPLETE flag.
2124          */
2125         lblkno = args->rmtblkno;
2126         valuelen = args->valuelen;
2127         while (valuelen > 0) {
2128                 /*
2129                  * Try to remember where we decided to put the value.
2130                  */
2131                 XFS_BMAP_INIT(args->flist, args->firstblock);
2132                 nmap = 1;
2133                 error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
2134                                   args->rmtblkcnt,
2135                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2136                                   args->firstblock, 0, &map, &nmap,
2137                                   NULL, NULL);
2138                 if (error) {
2139                         return(error);
2140                 }
2141                 ASSERT(nmap == 1);
2142                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2143                        (map.br_startblock != HOLESTARTBLOCK));
2144
2145                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2146                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2147
2148                 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno,
2149                                                         blkcnt, XFS_BUF_LOCK);
2150                 ASSERT(bp);
2151                 ASSERT(!XFS_BUF_GETERROR(bp));
2152
2153                 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
2154                                                         XFS_BUF_SIZE(bp);
2155                 xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE);
2156                 if (tmp < XFS_BUF_SIZE(bp))
2157                         xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
2158                 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
2159                         return (error);
2160                 }
2161                 src += tmp;
2162                 valuelen -= tmp;
2163
2164                 lblkno += map.br_blockcount;
2165         }
2166         ASSERT(valuelen == 0);
2167         return(0);
2168 }
2169
2170 /*
2171  * Remove the value associated with an attribute by deleting the
2172  * out-of-line buffer that it is stored on.
2173  */
2174 STATIC int
2175 xfs_attr_rmtval_remove(xfs_da_args_t *args)
2176 {
2177         xfs_mount_t *mp;
2178         xfs_bmbt_irec_t map;
2179         xfs_buf_t *bp;
2180         xfs_daddr_t dblkno;
2181         xfs_dablk_t lblkno;
2182         int valuelen, blkcnt, nmap, error, done, committed;
2183
2184         mp = args->dp->i_mount;
2185
2186         /*
2187          * Roll through the "value", invalidating the attribute value's
2188          * blocks.
2189          */
2190         lblkno = args->rmtblkno;
2191         valuelen = args->rmtblkcnt;
2192         while (valuelen > 0) {
2193                 /*
2194                  * Try to remember where we decided to put the value.
2195                  */
2196                 XFS_BMAP_INIT(args->flist, args->firstblock);
2197                 nmap = 1;
2198                 error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno,
2199                                         args->rmtblkcnt,
2200                                         XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2201                                         args->firstblock, 0, &map, &nmap,
2202                                         args->flist, NULL);
2203                 if (error) {
2204                         return(error);
2205                 }
2206                 ASSERT(nmap == 1);
2207                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2208                        (map.br_startblock != HOLESTARTBLOCK));
2209
2210                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2211                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2212
2213                 /*
2214                  * If the "remote" value is in the cache, remove it.
2215                  */
2216                 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt,
2217                                 XFS_INCORE_TRYLOCK);
2218                 if (bp) {
2219                         XFS_BUF_STALE(bp);
2220                         XFS_BUF_UNDELAYWRITE(bp);
2221                         xfs_buf_relse(bp);
2222                         bp = NULL;
2223                 }
2224
2225                 valuelen -= map.br_blockcount;
2226
2227                 lblkno += map.br_blockcount;
2228         }
2229
2230         /*
2231          * Keep de-allocating extents until the remote-value region is gone.
2232          */
2233         lblkno = args->rmtblkno;
2234         blkcnt = args->rmtblkcnt;
2235         done = 0;
2236         while (!done) {
2237                 XFS_BMAP_INIT(args->flist, args->firstblock);
2238                 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2239                                     XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2240                                     1, args->firstblock, args->flist,
2241                                     NULL, &done);
2242                 if (!error) {
2243                         error = xfs_bmap_finish(&args->trans, args->flist,
2244                                                 &committed);
2245                 }
2246                 if (error) {
2247                         ASSERT(committed);
2248                         args->trans = NULL;
2249                         xfs_bmap_cancel(args->flist);
2250                         return(error);
2251                 }
2252
2253                 /*
2254                  * bmap_finish() may have committed the last trans and started
2255                  * a new one.  We need the inode to be in all transactions.
2256                  */
2257                 if (committed) {
2258                         xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL);
2259                         xfs_trans_ihold(args->trans, args->dp);
2260                 }
2261
2262                 /*
2263                  * Close out trans and start the next one in the chain.
2264                  */
2265                 if ((error = xfs_attr_rolltrans(&args->trans, args->dp)))
2266                         return (error);
2267         }
2268         return(0);
2269 }
2270
2271 #if defined(XFS_ATTR_TRACE)
2272 /*
2273  * Add a trace buffer entry for an attr_list context structure.
2274  */
2275 void
2276 xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2277 {
2278         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where,
2279                 (__psunsigned_t)context->dp,
2280                 (__psunsigned_t)context->cursor->hashval,
2281                 (__psunsigned_t)context->cursor->blkno,
2282                 (__psunsigned_t)context->cursor->offset,
2283                 (__psunsigned_t)context->alist,
2284                 (__psunsigned_t)context->bufsize,
2285                 (__psunsigned_t)context->count,
2286                 (__psunsigned_t)context->firstu,
2287                 (__psunsigned_t)
2288                         ((context->count > 0) &&
2289                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2290                                 ? (ATTR_ENTRY(context->alist,
2291                                               context->count-1)->a_valuelen)
2292                                 : 0,
2293                 (__psunsigned_t)context->dupcnt,
2294                 (__psunsigned_t)context->flags,
2295                 (__psunsigned_t)NULL,
2296                 (__psunsigned_t)NULL,
2297                 (__psunsigned_t)NULL);
2298 }
2299
2300 /*
2301  * Add a trace buffer entry for a context structure and a Btree node.
2302  */
2303 void
2304 xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2305                          struct xfs_da_intnode *node)
2306 {
2307         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where,
2308                 (__psunsigned_t)context->dp,
2309                 (__psunsigned_t)context->cursor->hashval,
2310                 (__psunsigned_t)context->cursor->blkno,
2311                 (__psunsigned_t)context->cursor->offset,
2312                 (__psunsigned_t)context->alist,
2313                 (__psunsigned_t)context->bufsize,
2314                 (__psunsigned_t)context->count,
2315                 (__psunsigned_t)context->firstu,
2316                 (__psunsigned_t)
2317                         ((context->count > 0) &&
2318                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2319                                 ? (ATTR_ENTRY(context->alist,
2320                                               context->count-1)->a_valuelen)
2321                                 : 0,
2322                 (__psunsigned_t)context->dupcnt,
2323                 (__psunsigned_t)context->flags,
2324                 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2325                 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2326                 (__psunsigned_t)be32_to_cpu(node->btree[
2327                                     be16_to_cpu(node->hdr.count)-1].hashval));
2328 }
2329
2330 /*
2331  * Add a trace buffer entry for a context structure and a Btree element.
2332  */
2333 void
2334 xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2335                           struct xfs_da_node_entry *btree)
2336 {
2337         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where,
2338                 (__psunsigned_t)context->dp,
2339                 (__psunsigned_t)context->cursor->hashval,
2340                 (__psunsigned_t)context->cursor->blkno,
2341                 (__psunsigned_t)context->cursor->offset,
2342                 (__psunsigned_t)context->alist,
2343                 (__psunsigned_t)context->bufsize,
2344                 (__psunsigned_t)context->count,
2345                 (__psunsigned_t)context->firstu,
2346                 (__psunsigned_t)
2347                         ((context->count > 0) &&
2348                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2349                                 ? (ATTR_ENTRY(context->alist,
2350                                               context->count-1)->a_valuelen)
2351                                 : 0,
2352                 (__psunsigned_t)context->dupcnt,
2353                 (__psunsigned_t)context->flags,
2354                 (__psunsigned_t)be32_to_cpu(btree->hashval),
2355                 (__psunsigned_t)be32_to_cpu(btree->before),
2356                 (__psunsigned_t)NULL);
2357 }
2358
2359 /*
2360  * Add a trace buffer entry for a context structure and a leaf block.
2361  */
2362 void
2363 xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2364                               struct xfs_attr_leafblock *leaf)
2365 {
2366         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where,
2367                 (__psunsigned_t)context->dp,
2368                 (__psunsigned_t)context->cursor->hashval,
2369                 (__psunsigned_t)context->cursor->blkno,
2370                 (__psunsigned_t)context->cursor->offset,
2371                 (__psunsigned_t)context->alist,
2372                 (__psunsigned_t)context->bufsize,
2373                 (__psunsigned_t)context->count,
2374                 (__psunsigned_t)context->firstu,
2375                 (__psunsigned_t)
2376                         ((context->count > 0) &&
2377                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2378                                 ? (ATTR_ENTRY(context->alist,
2379                                               context->count-1)->a_valuelen)
2380                                 : 0,
2381                 (__psunsigned_t)context->dupcnt,
2382                 (__psunsigned_t)context->flags,
2383                 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2384                 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2385                 (__psunsigned_t)be32_to_cpu(leaf->entries[
2386                                 be16_to_cpu(leaf->hdr.count)-1].hashval));
2387 }
2388
2389 /*
2390  * Add a trace buffer entry for the arguments given to the routine,
2391  * generic form.
2392  */
2393 void
2394 xfs_attr_trace_enter(int type, char *where,
2395                          __psunsigned_t a2, __psunsigned_t a3,
2396                          __psunsigned_t a4, __psunsigned_t a5,
2397                          __psunsigned_t a6, __psunsigned_t a7,
2398                          __psunsigned_t a8, __psunsigned_t a9,
2399                          __psunsigned_t a10, __psunsigned_t a11,
2400                          __psunsigned_t a12, __psunsigned_t a13,
2401                          __psunsigned_t a14, __psunsigned_t a15)
2402 {
2403         ASSERT(xfs_attr_trace_buf);
2404         ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2405                                          (void *)where,
2406                                          (void *)a2,  (void *)a3,  (void *)a4,
2407                                          (void *)a5,  (void *)a6,  (void *)a7,
2408                                          (void *)a8,  (void *)a9,  (void *)a10,
2409                                          (void *)a11, (void *)a12, (void *)a13,
2410                                          (void *)a14, (void *)a15);
2411 }
2412 #endif  /* XFS_ATTR_TRACE */
2413
2414
2415 /*========================================================================
2416  * System (pseudo) namespace attribute interface routines.
2417  *========================================================================*/
2418
2419 STATIC int
2420 posix_acl_access_set(
2421         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2422 {
2423         return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
2424 }
2425
2426 STATIC int
2427 posix_acl_access_remove(
2428         bhv_vnode_t *vp, char *name, int xflags)
2429 {
2430         return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
2431 }
2432
2433 STATIC int
2434 posix_acl_access_get(
2435         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2436 {
2437         return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
2438 }
2439
2440 STATIC int
2441 posix_acl_access_exists(
2442         bhv_vnode_t *vp)
2443 {
2444         return xfs_acl_vhasacl_access(vp);
2445 }
2446
2447 STATIC int
2448 posix_acl_default_set(
2449         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2450 {
2451         return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
2452 }
2453
2454 STATIC int
2455 posix_acl_default_get(
2456         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2457 {
2458         return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
2459 }
2460
2461 STATIC int
2462 posix_acl_default_remove(
2463         bhv_vnode_t *vp, char *name, int xflags)
2464 {
2465         return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
2466 }
2467
2468 STATIC int
2469 posix_acl_default_exists(
2470         bhv_vnode_t *vp)
2471 {
2472         return xfs_acl_vhasacl_default(vp);
2473 }
2474
2475 static struct attrnames posix_acl_access = {
2476         .attr_name      = "posix_acl_access",
2477         .attr_namelen   = sizeof("posix_acl_access") - 1,
2478         .attr_get       = posix_acl_access_get,
2479         .attr_set       = posix_acl_access_set,
2480         .attr_remove    = posix_acl_access_remove,
2481         .attr_exists    = posix_acl_access_exists,
2482 };
2483
2484 static struct attrnames posix_acl_default = {
2485         .attr_name      = "posix_acl_default",
2486         .attr_namelen   = sizeof("posix_acl_default") - 1,
2487         .attr_get       = posix_acl_default_get,
2488         .attr_set       = posix_acl_default_set,
2489         .attr_remove    = posix_acl_default_remove,
2490         .attr_exists    = posix_acl_default_exists,
2491 };
2492
2493 static struct attrnames *attr_system_names[] =
2494         { &posix_acl_access, &posix_acl_default };
2495
2496
2497 /*========================================================================
2498  * Namespace-prefix-style attribute name interface routines.
2499  *========================================================================*/
2500
2501 STATIC int
2502 attr_generic_set(
2503         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2504 {
2505         return -bhv_vop_attr_set(vp, name, data, size, xflags, NULL);
2506 }
2507
2508 STATIC int
2509 attr_generic_get(
2510         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2511 {
2512         int     error, asize = size;
2513
2514         error = bhv_vop_attr_get(vp, name, data, &asize, xflags, NULL);
2515         if (!error)
2516                 return asize;
2517         return -error;
2518 }
2519
2520 STATIC int
2521 attr_generic_remove(
2522         bhv_vnode_t *vp, char *name, int xflags)
2523 {
2524         return -bhv_vop_attr_remove(vp, name, xflags, NULL);
2525 }
2526
2527 STATIC int
2528 attr_generic_listadd(
2529         attrnames_t             *prefix,
2530         attrnames_t             *namesp,
2531         void                    *data,
2532         size_t                  size,
2533         ssize_t                 *result)
2534 {
2535         char                    *p = data + *result;
2536
2537         *result += prefix->attr_namelen;
2538         *result += namesp->attr_namelen + 1;
2539         if (!size)
2540                 return 0;
2541         if (*result > size)
2542                 return -ERANGE;
2543         strcpy(p, prefix->attr_name);
2544         p += prefix->attr_namelen;
2545         strcpy(p, namesp->attr_name);
2546         p += namesp->attr_namelen + 1;
2547         return 0;
2548 }
2549
2550 STATIC int
2551 attr_system_list(
2552         bhv_vnode_t             *vp,
2553         void                    *data,
2554         size_t                  size,
2555         ssize_t                 *result)
2556 {
2557         attrnames_t             *namesp;
2558         int                     i, error = 0;
2559
2560         for (i = 0; i < ATTR_SYSCOUNT; i++) {
2561                 namesp = attr_system_names[i];
2562                 if (!namesp->attr_exists || !namesp->attr_exists(vp))
2563                         continue;
2564                 error = attr_generic_listadd(&attr_system, namesp,
2565                                                 data, size, result);
2566                 if (error)
2567                         break;
2568         }
2569         return error;
2570 }
2571
2572 int
2573 attr_generic_list(
2574         bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
2575 {
2576         attrlist_cursor_kern_t  cursor = { 0 };
2577         int                     error;
2578
2579         error = bhv_vop_attr_list(vp, data, size, xflags, &cursor, NULL);
2580         if (error > 0)
2581                 return -error;
2582         *result = -error;
2583         return attr_system_list(vp, data, size, result);
2584 }
2585
2586 attrnames_t *
2587 attr_lookup_namespace(
2588         char                    *name,
2589         struct attrnames        **names,
2590         int                     nnames)
2591 {
2592         int                     i;
2593
2594         for (i = 0; i < nnames; i++)
2595                 if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
2596                         return names[i];
2597         return NULL;
2598 }
2599
2600 /*
2601  * Some checks to prevent people abusing EAs to get over quota:
2602  * - Don't allow modifying user EAs on devices/symlinks;
2603  * - Don't allow modifying user EAs if sticky bit set;
2604  */
2605 STATIC int
2606 attr_user_capable(
2607         bhv_vnode_t     *vp,
2608         cred_t          *cred)
2609 {
2610         struct inode    *inode = vn_to_inode(vp);
2611
2612         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2613                 return -EPERM;
2614         if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
2615             !capable(CAP_SYS_ADMIN))
2616                 return -EPERM;
2617         if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
2618             (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
2619                 return -EPERM;
2620         return 0;
2621 }
2622
2623 STATIC int
2624 attr_trusted_capable(
2625         bhv_vnode_t     *vp,
2626         cred_t          *cred)
2627 {
2628         struct inode    *inode = vn_to_inode(vp);
2629
2630         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2631                 return -EPERM;
2632         if (!capable(CAP_SYS_ADMIN))
2633                 return -EPERM;
2634         return 0;
2635 }
2636
2637 STATIC int
2638 attr_secure_capable(
2639         bhv_vnode_t     *vp,
2640         cred_t          *cred)
2641 {
2642         return -ENOSECURITY;
2643 }
2644
2645 STATIC int
2646 attr_system_set(
2647         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2648 {
2649         attrnames_t     *namesp;
2650         int             error;
2651
2652         if (xflags & ATTR_CREATE)
2653                 return -EINVAL;
2654
2655         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2656         if (!namesp)
2657                 return -EOPNOTSUPP;
2658         error = namesp->attr_set(vp, name, data, size, xflags);
2659         if (!error)
2660                 error = vn_revalidate(vp);
2661         return error;
2662 }
2663
2664 STATIC int
2665 attr_system_get(
2666         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2667 {
2668         attrnames_t     *namesp;
2669
2670         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2671         if (!namesp)
2672                 return -EOPNOTSUPP;
2673         return namesp->attr_get(vp, name, data, size, xflags);
2674 }
2675
2676 STATIC int
2677 attr_system_remove(
2678         bhv_vnode_t *vp, char *name, int xflags)
2679 {
2680         attrnames_t     *namesp;
2681
2682         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2683         if (!namesp)
2684                 return -EOPNOTSUPP;
2685         return namesp->attr_remove(vp, name, xflags);
2686 }
2687
2688 struct attrnames attr_system = {
2689         .attr_name      = "system.",
2690         .attr_namelen   = sizeof("system.") - 1,
2691         .attr_flag      = ATTR_SYSTEM,
2692         .attr_get       = attr_system_get,
2693         .attr_set       = attr_system_set,
2694         .attr_remove    = attr_system_remove,
2695         .attr_capable   = (attrcapable_t)fs_noerr,
2696 };
2697
2698 struct attrnames attr_trusted = {
2699         .attr_name      = "trusted.",
2700         .attr_namelen   = sizeof("trusted.") - 1,
2701         .attr_flag      = ATTR_ROOT,
2702         .attr_get       = attr_generic_get,
2703         .attr_set       = attr_generic_set,
2704         .attr_remove    = attr_generic_remove,
2705         .attr_capable   = attr_trusted_capable,
2706 };
2707
2708 struct attrnames attr_secure = {
2709         .attr_name      = "security.",
2710         .attr_namelen   = sizeof("security.") - 1,
2711         .attr_flag      = ATTR_SECURE,
2712         .attr_get       = attr_generic_get,
2713         .attr_set       = attr_generic_set,
2714         .attr_remove    = attr_generic_remove,
2715         .attr_capable   = attr_secure_capable,
2716 };
2717
2718 struct attrnames attr_user = {
2719         .attr_name      = "user.",
2720         .attr_namelen   = sizeof("user.") - 1,
2721         .attr_get       = attr_generic_get,
2722         .attr_set       = attr_generic_set,
2723         .attr_remove    = attr_generic_remove,
2724         .attr_capable   = attr_user_capable,
2725 };
2726
2727 struct attrnames *attr_namespaces[] =
2728         { &attr_system, &attr_trusted, &attr_secure, &attr_user };