Merge branch 'master' of /home/trondmy/kernel/linux-2.6/
[linux-2.6] / fs / xfs / xfs_attr.c
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18
19 #include <linux/capability.h>
20
21 #include "xfs.h"
22 #include "xfs_fs.h"
23 #include "xfs_types.h"
24 #include "xfs_bit.h"
25 #include "xfs_log.h"
26 #include "xfs_inum.h"
27 #include "xfs_trans.h"
28 #include "xfs_sb.h"
29 #include "xfs_ag.h"
30 #include "xfs_dir2.h"
31 #include "xfs_dmapi.h"
32 #include "xfs_mount.h"
33 #include "xfs_da_btree.h"
34 #include "xfs_bmap_btree.h"
35 #include "xfs_alloc_btree.h"
36 #include "xfs_ialloc_btree.h"
37 #include "xfs_dir2_sf.h"
38 #include "xfs_attr_sf.h"
39 #include "xfs_dinode.h"
40 #include "xfs_inode.h"
41 #include "xfs_alloc.h"
42 #include "xfs_btree.h"
43 #include "xfs_inode_item.h"
44 #include "xfs_bmap.h"
45 #include "xfs_attr.h"
46 #include "xfs_attr_leaf.h"
47 #include "xfs_error.h"
48 #include "xfs_quota.h"
49 #include "xfs_trans_space.h"
50 #include "xfs_acl.h"
51 #include "xfs_rw.h"
52
53 /*
54  * xfs_attr.c
55  *
56  * Provide the external interfaces to manage attribute lists.
57  */
58
59 #define ATTR_SYSCOUNT   2
60 static struct attrnames posix_acl_access;
61 static struct attrnames posix_acl_default;
62 static struct attrnames *attr_system_names[ATTR_SYSCOUNT];
63
64 /*========================================================================
65  * Function prototypes for the kernel.
66  *========================================================================*/
67
68 /*
69  * Internal routines when attribute list fits inside the inode.
70  */
71 STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
72
73 /*
74  * Internal routines when attribute list is one block.
75  */
76 STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
77 STATIC int xfs_attr_leaf_addname(xfs_da_args_t *args);
78 STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
79 STATIC int xfs_attr_leaf_list(xfs_attr_list_context_t *context);
80
81 /*
82  * Internal routines when attribute list is more than one block.
83  */
84 STATIC int xfs_attr_node_get(xfs_da_args_t *args);
85 STATIC int xfs_attr_node_addname(xfs_da_args_t *args);
86 STATIC int xfs_attr_node_removename(xfs_da_args_t *args);
87 STATIC int xfs_attr_node_list(xfs_attr_list_context_t *context);
88 STATIC int xfs_attr_fillstate(xfs_da_state_t *state);
89 STATIC int xfs_attr_refillstate(xfs_da_state_t *state);
90
91 /*
92  * Routines to manipulate out-of-line attribute values.
93  */
94 STATIC int xfs_attr_rmtval_set(xfs_da_args_t *args);
95 STATIC int xfs_attr_rmtval_remove(xfs_da_args_t *args);
96
97 #define ATTR_RMTVALUE_MAPSIZE   1       /* # of map entries at once */
98
99 #if defined(XFS_ATTR_TRACE)
100 ktrace_t *xfs_attr_trace_buf;
101 #endif
102
103
104 /*========================================================================
105  * Overall external interface routines.
106  *========================================================================*/
107
108 int
109 xfs_attr_fetch(xfs_inode_t *ip, const char *name, int namelen,
110                char *value, int *valuelenp, int flags, struct cred *cred)
111 {
112         xfs_da_args_t   args;
113         int             error;
114
115         if ((XFS_IFORK_Q(ip) == 0) ||
116             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
117              ip->i_d.di_anextents == 0))
118                 return(ENOATTR);
119
120         /*
121          * Fill in the arg structure for this request.
122          */
123         memset((char *)&args, 0, sizeof(args));
124         args.name = name;
125         args.namelen = namelen;
126         args.value = value;
127         args.valuelen = *valuelenp;
128         args.flags = flags;
129         args.hashval = xfs_da_hashname(args.name, args.namelen);
130         args.dp = ip;
131         args.whichfork = XFS_ATTR_FORK;
132
133         /*
134          * Decide on what work routines to call based on the inode size.
135          */
136         if (XFS_IFORK_Q(ip) == 0 ||
137             (ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
138              ip->i_d.di_anextents == 0)) {
139                 error = XFS_ERROR(ENOATTR);
140         } else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
141                 error = xfs_attr_shortform_getvalue(&args);
142         } else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) {
143                 error = xfs_attr_leaf_get(&args);
144         } else {
145                 error = xfs_attr_node_get(&args);
146         }
147
148         /*
149          * Return the number of bytes in the value to the caller.
150          */
151         *valuelenp = args.valuelen;
152
153         if (error == EEXIST)
154                 error = 0;
155         return(error);
156 }
157
158 int
159 xfs_attr_get(bhv_desc_t *bdp, const char *name, char *value, int *valuelenp,
160              int flags, struct cred *cred)
161 {
162         xfs_inode_t     *ip = XFS_BHVTOI(bdp);
163         int             error, namelen;
164
165         XFS_STATS_INC(xs_attr_get);
166
167         if (!name)
168                 return(EINVAL);
169         namelen = strlen(name);
170         if (namelen >= MAXNAMELEN)
171                 return(EFAULT);         /* match IRIX behaviour */
172
173         if (XFS_FORCED_SHUTDOWN(ip->i_mount))
174                 return(EIO);
175
176         xfs_ilock(ip, XFS_ILOCK_SHARED);
177         error = xfs_attr_fetch(ip, name, namelen, value, valuelenp, flags, cred);
178         xfs_iunlock(ip, XFS_ILOCK_SHARED);
179         return(error);
180 }
181
182 int
183 xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
184                  char *value, int valuelen, int flags)
185 {
186         xfs_da_args_t   args;
187         xfs_fsblock_t   firstblock;
188         xfs_bmap_free_t flist;
189         int             error, err2, committed;
190         int             local, size;
191         uint            nblks;
192         xfs_mount_t     *mp = dp->i_mount;
193         int             rsvd = (flags & ATTR_ROOT) != 0;
194
195         /*
196          * Attach the dquots to the inode.
197          */
198         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
199                 return (error);
200
201         /*
202          * If the inode doesn't have an attribute fork, add one.
203          * (inode must not be locked when we call this routine)
204          */
205         if (XFS_IFORK_Q(dp) == 0) {
206                 int sf_size = sizeof(xfs_attr_sf_hdr_t) +
207                               XFS_ATTR_SF_ENTSIZE_BYNAME(namelen, valuelen);
208
209                 if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd)))
210                         return(error);
211         }
212
213         /*
214          * Fill in the arg structure for this request.
215          */
216         memset((char *)&args, 0, sizeof(args));
217         args.name = name;
218         args.namelen = namelen;
219         args.value = value;
220         args.valuelen = valuelen;
221         args.flags = flags;
222         args.hashval = xfs_da_hashname(args.name, args.namelen);
223         args.dp = dp;
224         args.firstblock = &firstblock;
225         args.flist = &flist;
226         args.whichfork = XFS_ATTR_FORK;
227         args.addname = 1;
228         args.oknoent = 1;
229
230         /*
231          * Determine space new attribute will use, and if it would be
232          * "local" or "remote" (note: local != inline).
233          */
234         size = xfs_attr_leaf_newentsize(namelen, valuelen,
235                                         mp->m_sb.sb_blocksize, &local);
236
237         nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);
238         if (local) {
239                 if (size > (mp->m_sb.sb_blocksize >> 1)) {
240                         /* Double split possible */
241                         nblks <<= 1;
242                 }
243         } else {
244                 uint    dblocks = XFS_B_TO_FSB(mp, valuelen);
245                 /* Out of line attribute, cannot double split, but make
246                  * room for the attribute value itself.
247                  */
248                 nblks += dblocks;
249                 nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);
250         }
251
252         /* Size is now blocks for attribute data */
253         args.total = nblks;
254
255         /*
256          * Start our first transaction of the day.
257          *
258          * All future transactions during this code must be "chained" off
259          * this one via the trans_dup() call.  All transactions will contain
260          * the inode, and the inode will always be marked with trans_ihold().
261          * Since the inode will be locked in all transactions, we must log
262          * the inode in every transaction to let it float upward through
263          * the log.
264          */
265         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_SET);
266
267         /*
268          * Root fork attributes can use reserved data blocks for this
269          * operation if necessary
270          */
271
272         if (rsvd)
273                 args.trans->t_flags |= XFS_TRANS_RESERVE;
274
275         if ((error = xfs_trans_reserve(args.trans, (uint) nblks,
276                                       XFS_ATTRSET_LOG_RES(mp, nblks),
277                                       0, XFS_TRANS_PERM_LOG_RES,
278                                       XFS_ATTRSET_LOG_COUNT))) {
279                 xfs_trans_cancel(args.trans, 0);
280                 return(error);
281         }
282         xfs_ilock(dp, XFS_ILOCK_EXCL);
283
284         error = XFS_TRANS_RESERVE_QUOTA_NBLKS(mp, args.trans, dp, nblks, 0,
285                          rsvd ? XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :
286                                 XFS_QMOPT_RES_REGBLKS);
287         if (error) {
288                 xfs_iunlock(dp, XFS_ILOCK_EXCL);
289                 xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES);
290                 return (error);
291         }
292
293         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
294         xfs_trans_ihold(args.trans, dp);
295
296         /*
297          * If the attribute list is non-existent or a shortform list,
298          * upgrade it to a single-leaf-block attribute list.
299          */
300         if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
301             ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) &&
302              (dp->i_d.di_anextents == 0))) {
303
304                 /*
305                  * Build initial attribute list (if required).
306                  */
307                 if (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS)
308                         xfs_attr_shortform_create(&args);
309
310                 /*
311                  * Try to add the attr to the attribute list in
312                  * the inode.
313                  */
314                 error = xfs_attr_shortform_addname(&args);
315                 if (error != ENOSPC) {
316                         /*
317                          * Commit the shortform mods, and we're done.
318                          * NOTE: this is also the error path (EEXIST, etc).
319                          */
320                         ASSERT(args.trans != NULL);
321
322                         /*
323                          * If this is a synchronous mount, make sure that
324                          * the transaction goes to disk before returning
325                          * to the user.
326                          */
327                         if (mp->m_flags & XFS_MOUNT_WSYNC) {
328                                 xfs_trans_set_sync(args.trans);
329                         }
330                         err2 = xfs_trans_commit(args.trans,
331                                                  XFS_TRANS_RELEASE_LOG_RES,
332                                                  NULL);
333                         xfs_iunlock(dp, XFS_ILOCK_EXCL);
334
335                         /*
336                          * Hit the inode change time.
337                          */
338                         if (!error && (flags & ATTR_KERNOTIME) == 0) {
339                                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
340                         }
341                         return(error == 0 ? err2 : error);
342                 }
343
344                 /*
345                  * It won't fit in the shortform, transform to a leaf block.
346                  * GROT: another possible req'mt for a double-split btree op.
347                  */
348                 XFS_BMAP_INIT(args.flist, args.firstblock);
349                 error = xfs_attr_shortform_to_leaf(&args);
350                 if (!error) {
351                         error = xfs_bmap_finish(&args.trans, args.flist,
352                                                 &committed);
353                 }
354                 if (error) {
355                         ASSERT(committed);
356                         args.trans = NULL;
357                         xfs_bmap_cancel(&flist);
358                         goto out;
359                 }
360
361                 /*
362                  * bmap_finish() may have committed the last trans and started
363                  * a new one.  We need the inode to be in all transactions.
364                  */
365                 if (committed) {
366                         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
367                         xfs_trans_ihold(args.trans, dp);
368                 }
369
370                 /*
371                  * Commit the leaf transformation.  We'll need another (linked)
372                  * transaction to add the new attribute to the leaf.
373                  */
374                 if ((error = xfs_attr_rolltrans(&args.trans, dp)))
375                         goto out;
376
377         }
378
379         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
380                 error = xfs_attr_leaf_addname(&args);
381         } else {
382                 error = xfs_attr_node_addname(&args);
383         }
384         if (error) {
385                 goto out;
386         }
387
388         /*
389          * If this is a synchronous mount, make sure that the
390          * transaction goes to disk before returning to the user.
391          */
392         if (mp->m_flags & XFS_MOUNT_WSYNC) {
393                 xfs_trans_set_sync(args.trans);
394         }
395
396         /*
397          * Commit the last in the sequence of transactions.
398          */
399         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
400         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES,
401                                  NULL);
402         xfs_iunlock(dp, XFS_ILOCK_EXCL);
403
404         /*
405          * Hit the inode change time.
406          */
407         if (!error && (flags & ATTR_KERNOTIME) == 0) {
408                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
409         }
410
411         return(error);
412
413 out:
414         if (args.trans)
415                 xfs_trans_cancel(args.trans,
416                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
417         xfs_iunlock(dp, XFS_ILOCK_EXCL);
418         return(error);
419 }
420
421 int
422 xfs_attr_set(bhv_desc_t *bdp, const char *name, char *value, int valuelen, int flags,
423              struct cred *cred)
424 {
425         xfs_inode_t     *dp;
426         int             namelen;
427
428         namelen = strlen(name);
429         if (namelen >= MAXNAMELEN)
430                 return EFAULT;          /* match IRIX behaviour */
431
432         XFS_STATS_INC(xs_attr_set);
433
434         dp = XFS_BHVTOI(bdp);
435         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
436                 return (EIO);
437
438         return xfs_attr_set_int(dp, name, namelen, value, valuelen, flags);
439 }
440
441 /*
442  * Generic handler routine to remove a name from an attribute list.
443  * Transitions attribute list from Btree to shortform as necessary.
444  */
445 int
446 xfs_attr_remove_int(xfs_inode_t *dp, const char *name, int namelen, int flags)
447 {
448         xfs_da_args_t   args;
449         xfs_fsblock_t   firstblock;
450         xfs_bmap_free_t flist;
451         int             error;
452         xfs_mount_t     *mp = dp->i_mount;
453
454         /*
455          * Fill in the arg structure for this request.
456          */
457         memset((char *)&args, 0, sizeof(args));
458         args.name = name;
459         args.namelen = namelen;
460         args.flags = flags;
461         args.hashval = xfs_da_hashname(args.name, args.namelen);
462         args.dp = dp;
463         args.firstblock = &firstblock;
464         args.flist = &flist;
465         args.total = 0;
466         args.whichfork = XFS_ATTR_FORK;
467
468         /*
469          * Attach the dquots to the inode.
470          */
471         if ((error = XFS_QM_DQATTACH(mp, dp, 0)))
472                 return (error);
473
474         /*
475          * Start our first transaction of the day.
476          *
477          * All future transactions during this code must be "chained" off
478          * this one via the trans_dup() call.  All transactions will contain
479          * the inode, and the inode will always be marked with trans_ihold().
480          * Since the inode will be locked in all transactions, we must log
481          * the inode in every transaction to let it float upward through
482          * the log.
483          */
484         args.trans = xfs_trans_alloc(mp, XFS_TRANS_ATTR_RM);
485
486         /*
487          * Root fork attributes can use reserved data blocks for this
488          * operation if necessary
489          */
490
491         if (flags & ATTR_ROOT)
492                 args.trans->t_flags |= XFS_TRANS_RESERVE;
493
494         if ((error = xfs_trans_reserve(args.trans,
495                                       XFS_ATTRRM_SPACE_RES(mp),
496                                       XFS_ATTRRM_LOG_RES(mp),
497                                       0, XFS_TRANS_PERM_LOG_RES,
498                                       XFS_ATTRRM_LOG_COUNT))) {
499                 xfs_trans_cancel(args.trans, 0);
500                 return(error);
501         }
502
503         xfs_ilock(dp, XFS_ILOCK_EXCL);
504         /*
505          * No need to make quota reservations here. We expect to release some
506          * blocks not allocate in the common case.
507          */
508         xfs_trans_ijoin(args.trans, dp, XFS_ILOCK_EXCL);
509         xfs_trans_ihold(args.trans, dp);
510
511         /*
512          * Decide on what work routines to call based on the inode size.
513          */
514         if (XFS_IFORK_Q(dp) == 0 ||
515             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
516              dp->i_d.di_anextents == 0)) {
517                 error = XFS_ERROR(ENOATTR);
518                 goto out;
519         }
520         if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
521                 ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);
522                 error = xfs_attr_shortform_remove(&args);
523                 if (error) {
524                         goto out;
525                 }
526         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
527                 error = xfs_attr_leaf_removename(&args);
528         } else {
529                 error = xfs_attr_node_removename(&args);
530         }
531         if (error) {
532                 goto out;
533         }
534
535         /*
536          * If this is a synchronous mount, make sure that the
537          * transaction goes to disk before returning to the user.
538          */
539         if (mp->m_flags & XFS_MOUNT_WSYNC) {
540                 xfs_trans_set_sync(args.trans);
541         }
542
543         /*
544          * Commit the last in the sequence of transactions.
545          */
546         xfs_trans_log_inode(args.trans, dp, XFS_ILOG_CORE);
547         error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES,
548                                  NULL);
549         xfs_iunlock(dp, XFS_ILOCK_EXCL);
550
551         /*
552          * Hit the inode change time.
553          */
554         if (!error && (flags & ATTR_KERNOTIME) == 0) {
555                 xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
556         }
557
558         return(error);
559
560 out:
561         if (args.trans)
562                 xfs_trans_cancel(args.trans,
563                         XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
564         xfs_iunlock(dp, XFS_ILOCK_EXCL);
565         return(error);
566 }
567
568 int
569 xfs_attr_remove(bhv_desc_t *bdp, const char *name, int flags, struct cred *cred)
570 {
571         xfs_inode_t         *dp;
572         int                 namelen;
573
574         namelen = strlen(name);
575         if (namelen >= MAXNAMELEN)
576                 return EFAULT;          /* match IRIX behaviour */
577
578         XFS_STATS_INC(xs_attr_remove);
579
580         dp = XFS_BHVTOI(bdp);
581         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
582                 return (EIO);
583
584         xfs_ilock(dp, XFS_ILOCK_SHARED);
585         if (XFS_IFORK_Q(dp) == 0 ||
586                    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
587                     dp->i_d.di_anextents == 0)) {
588                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
589                 return(XFS_ERROR(ENOATTR));
590         }
591         xfs_iunlock(dp, XFS_ILOCK_SHARED);
592
593         return xfs_attr_remove_int(dp, name, namelen, flags);
594 }
595
596 int                                                             /* error */
597 xfs_attr_list_int(xfs_attr_list_context_t *context)
598 {
599         int error;
600         xfs_inode_t *dp = context->dp;
601
602         /*
603          * Decide on what work routines to call based on the inode size.
604          */
605         if (XFS_IFORK_Q(dp) == 0 ||
606             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
607              dp->i_d.di_anextents == 0)) {
608                 error = 0;
609         } else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {
610                 error = xfs_attr_shortform_list(context);
611         } else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
612                 error = xfs_attr_leaf_list(context);
613         } else {
614                 error = xfs_attr_node_list(context);
615         }
616         return error;
617 }
618
619 #define ATTR_ENTBASESIZE                /* minimum bytes used by an attr */ \
620         (((struct attrlist_ent *) 0)->a_name - (char *) 0)
621 #define ATTR_ENTSIZE(namelen)           /* actual bytes used by an attr */ \
622         ((ATTR_ENTBASESIZE + (namelen) + 1 + sizeof(u_int32_t)-1) \
623          & ~(sizeof(u_int32_t)-1))
624
625 /*
626  * Format an attribute and copy it out to the user's buffer.
627  * Take care to check values and protect against them changing later,
628  * we may be reading them directly out of a user buffer.
629  */
630 /*ARGSUSED*/
631 STATIC int
632 xfs_attr_put_listent(xfs_attr_list_context_t *context, attrnames_t *namesp,
633                      char *name, int namelen,
634                      int valuelen, char *value)
635 {
636         attrlist_ent_t *aep;
637         int arraytop;
638
639         ASSERT(!(context->flags & ATTR_KERNOVAL));
640         ASSERT(context->count >= 0);
641         ASSERT(context->count < (ATTR_MAX_VALUELEN/8));
642         ASSERT(context->firstu >= sizeof(*context->alist));
643         ASSERT(context->firstu <= context->bufsize);
644
645         arraytop = sizeof(*context->alist) +
646                         context->count * sizeof(context->alist->al_offset[0]);
647         context->firstu -= ATTR_ENTSIZE(namelen);
648         if (context->firstu < arraytop) {
649                 xfs_attr_trace_l_c("buffer full", context);
650                 context->alist->al_more = 1;
651                 context->seen_enough = 1;
652                 return 1;
653         }
654
655         aep = (attrlist_ent_t *)&(((char *)context->alist)[ context->firstu ]);
656         aep->a_valuelen = valuelen;
657         memcpy(aep->a_name, name, namelen);
658         aep->a_name[ namelen ] = 0;
659         context->alist->al_offset[ context->count++ ] = context->firstu;
660         context->alist->al_count = context->count;
661         xfs_attr_trace_l_c("add", context);
662         return 0;
663 }
664
665 STATIC int
666 xfs_attr_kern_list(xfs_attr_list_context_t *context, attrnames_t *namesp,
667                      char *name, int namelen,
668                      int valuelen, char *value)
669 {
670         char *offset;
671         int arraytop;
672
673         ASSERT(context->count >= 0);
674
675         arraytop = context->count + namesp->attr_namelen + namelen + 1;
676         if (arraytop > context->firstu) {
677                 context->count = -1;    /* insufficient space */
678                 return 1;
679         }
680         offset = (char *)context->alist + context->count;
681         strncpy(offset, namesp->attr_name, namesp->attr_namelen);
682         offset += namesp->attr_namelen;
683         strncpy(offset, name, namelen);                 /* real name */
684         offset += namelen;
685         *offset = '\0';
686         context->count += namesp->attr_namelen + namelen + 1;
687         return 0;
688 }
689
690 /*ARGSUSED*/
691 STATIC int
692 xfs_attr_kern_list_sizes(xfs_attr_list_context_t *context, attrnames_t *namesp,
693                      char *name, int namelen,
694                      int valuelen, char *value)
695 {
696         context->count += namesp->attr_namelen + namelen + 1;
697         return 0;
698 }
699
700 /*
701  * Generate a list of extended attribute names and optionally
702  * also value lengths.  Positive return value follows the XFS
703  * convention of being an error, zero or negative return code
704  * is the length of the buffer returned (negated), indicating
705  * success.
706  */
707 int
708 xfs_attr_list(bhv_desc_t *bdp, char *buffer, int bufsize, int flags,
709                       attrlist_cursor_kern_t *cursor, struct cred *cred)
710 {
711         xfs_attr_list_context_t context;
712         xfs_inode_t *dp;
713         int error;
714
715         XFS_STATS_INC(xs_attr_list);
716
717         /*
718          * Validate the cursor.
719          */
720         if (cursor->pad1 || cursor->pad2)
721                 return(XFS_ERROR(EINVAL));
722         if ((cursor->initted == 0) &&
723             (cursor->hashval || cursor->blkno || cursor->offset))
724                 return XFS_ERROR(EINVAL);
725
726         /*
727          * Check for a properly aligned buffer.
728          */
729         if (((long)buffer) & (sizeof(int)-1))
730                 return XFS_ERROR(EFAULT);
731         if (flags & ATTR_KERNOVAL)
732                 bufsize = 0;
733
734         /*
735          * Initialize the output buffer.
736          */
737         context.dp = dp = XFS_BHVTOI(bdp);
738         context.cursor = cursor;
739         context.count = 0;
740         context.dupcnt = 0;
741         context.resynch = 1;
742         context.flags = flags;
743         context.seen_enough = 0;
744         context.alist = (attrlist_t *)buffer;
745         context.put_value = 0;
746
747         if (flags & ATTR_KERNAMELS) {
748                 context.bufsize = bufsize;
749                 context.firstu = context.bufsize;
750                 if (flags & ATTR_KERNOVAL)
751                         context.put_listent = xfs_attr_kern_list_sizes;
752                 else
753                         context.put_listent = xfs_attr_kern_list;
754         } else {
755                 context.bufsize = (bufsize & ~(sizeof(int)-1));  /* align */
756                 context.firstu = context.bufsize;
757                 context.alist->al_count = 0;
758                 context.alist->al_more = 0;
759                 context.alist->al_offset[0] = context.bufsize;
760                 context.put_listent = xfs_attr_put_listent;
761         }
762
763         if (XFS_FORCED_SHUTDOWN(dp->i_mount))
764                 return EIO;
765
766         xfs_ilock(dp, XFS_ILOCK_SHARED);
767         xfs_attr_trace_l_c("syscall start", &context);
768
769         error = xfs_attr_list_int(&context);
770
771         xfs_iunlock(dp, XFS_ILOCK_SHARED);
772         xfs_attr_trace_l_c("syscall end", &context);
773
774         if (context.flags & (ATTR_KERNOVAL|ATTR_KERNAMELS)) {
775                 /* must return negated buffer size or the error */
776                 if (context.count < 0)
777                         error = XFS_ERROR(ERANGE);
778                 else
779                         error = -context.count;
780         } else
781                 ASSERT(error >= 0);
782
783         return error;
784 }
785
786 int                                                             /* error */
787 xfs_attr_inactive(xfs_inode_t *dp)
788 {
789         xfs_trans_t *trans;
790         xfs_mount_t *mp;
791         int error;
792
793         mp = dp->i_mount;
794         ASSERT(! XFS_NOT_DQATTACHED(mp, dp));
795
796         xfs_ilock(dp, XFS_ILOCK_SHARED);
797         if ((XFS_IFORK_Q(dp) == 0) ||
798             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
799             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
800              dp->i_d.di_anextents == 0)) {
801                 xfs_iunlock(dp, XFS_ILOCK_SHARED);
802                 return(0);
803         }
804         xfs_iunlock(dp, XFS_ILOCK_SHARED);
805
806         /*
807          * Start our first transaction of the day.
808          *
809          * All future transactions during this code must be "chained" off
810          * this one via the trans_dup() call.  All transactions will contain
811          * the inode, and the inode will always be marked with trans_ihold().
812          * Since the inode will be locked in all transactions, we must log
813          * the inode in every transaction to let it float upward through
814          * the log.
815          */
816         trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL);
817         if ((error = xfs_trans_reserve(trans, 0, XFS_ATTRINVAL_LOG_RES(mp), 0,
818                                       XFS_TRANS_PERM_LOG_RES,
819                                       XFS_ATTRINVAL_LOG_COUNT))) {
820                 xfs_trans_cancel(trans, 0);
821                 return(error);
822         }
823         xfs_ilock(dp, XFS_ILOCK_EXCL);
824
825         /*
826          * No need to make quota reservations here. We expect to release some
827          * blocks, not allocate, in the common case.
828          */
829         xfs_trans_ijoin(trans, dp, XFS_ILOCK_EXCL);
830         xfs_trans_ihold(trans, dp);
831
832         /*
833          * Decide on what work routines to call based on the inode size.
834          */
835         if ((XFS_IFORK_Q(dp) == 0) ||
836             (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
837             (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS &&
838              dp->i_d.di_anextents == 0)) {
839                 error = 0;
840                 goto out;
841         }
842         error = xfs_attr_root_inactive(&trans, dp);
843         if (error)
844                 goto out;
845         /*
846          * signal synchronous inactive transactions unless this
847          * is a synchronous mount filesystem in which case we
848          * know that we're here because we've been called out of
849          * xfs_inactive which means that the last reference is gone
850          * and the unlink transaction has already hit the disk so
851          * async inactive transactions are safe.
852          */
853         if ((error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK,
854                                 (!(mp->m_flags & XFS_MOUNT_WSYNC)
855                                  ? 1 : 0))))
856                 goto out;
857
858         /*
859          * Commit the last in the sequence of transactions.
860          */
861         xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE);
862         error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES,
863                                  NULL);
864         xfs_iunlock(dp, XFS_ILOCK_EXCL);
865
866         return(error);
867
868 out:
869         xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);
870         xfs_iunlock(dp, XFS_ILOCK_EXCL);
871         return(error);
872 }
873
874
875
876 /*========================================================================
877  * External routines when attribute list is inside the inode
878  *========================================================================*/
879
880 /*
881  * Add a name to the shortform attribute list structure
882  * This is the external routine.
883  */
884 STATIC int
885 xfs_attr_shortform_addname(xfs_da_args_t *args)
886 {
887         int newsize, forkoff, retval;
888
889         retval = xfs_attr_shortform_lookup(args);
890         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
891                 return(retval);
892         } else if (retval == EEXIST) {
893                 if (args->flags & ATTR_CREATE)
894                         return(retval);
895                 retval = xfs_attr_shortform_remove(args);
896                 ASSERT(retval == 0);
897         }
898
899         if (args->namelen >= XFS_ATTR_SF_ENTSIZE_MAX ||
900             args->valuelen >= XFS_ATTR_SF_ENTSIZE_MAX)
901                 return(XFS_ERROR(ENOSPC));
902
903         newsize = XFS_ATTR_SF_TOTSIZE(args->dp);
904         newsize += XFS_ATTR_SF_ENTSIZE_BYNAME(args->namelen, args->valuelen);
905
906         forkoff = xfs_attr_shortform_bytesfit(args->dp, newsize);
907         if (!forkoff)
908                 return(XFS_ERROR(ENOSPC));
909
910         xfs_attr_shortform_add(args, forkoff);
911         return(0);
912 }
913
914
915 /*========================================================================
916  * External routines when attribute list is one block
917  *========================================================================*/
918
919 /*
920  * Add a name to the leaf attribute list structure
921  *
922  * This leaf block cannot have a "remote" value, we only call this routine
923  * if bmap_one_block() says there is only one block (ie: no remote blks).
924  */
925 int
926 xfs_attr_leaf_addname(xfs_da_args_t *args)
927 {
928         xfs_inode_t *dp;
929         xfs_dabuf_t *bp;
930         int retval, error, committed, forkoff;
931
932         /*
933          * Read the (only) block in the attribute list in.
934          */
935         dp = args->dp;
936         args->blkno = 0;
937         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
938                                              XFS_ATTR_FORK);
939         if (error)
940                 return(error);
941         ASSERT(bp != NULL);
942
943         /*
944          * Look up the given attribute in the leaf block.  Figure out if
945          * the given flags produce an error or call for an atomic rename.
946          */
947         retval = xfs_attr_leaf_lookup_int(bp, args);
948         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
949                 xfs_da_brelse(args->trans, bp);
950                 return(retval);
951         } else if (retval == EEXIST) {
952                 if (args->flags & ATTR_CREATE) {        /* pure create op */
953                         xfs_da_brelse(args->trans, bp);
954                         return(retval);
955                 }
956                 args->rename = 1;                       /* an atomic rename */
957                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
958                 args->index2 = args->index;
959                 args->rmtblkno2 = args->rmtblkno;
960                 args->rmtblkcnt2 = args->rmtblkcnt;
961         }
962
963         /*
964          * Add the attribute to the leaf block, transitioning to a Btree
965          * if required.
966          */
967         retval = xfs_attr_leaf_add(bp, args);
968         xfs_da_buf_done(bp);
969         if (retval == ENOSPC) {
970                 /*
971                  * Promote the attribute list to the Btree format, then
972                  * Commit that transaction so that the node_addname() call
973                  * can manage its own transactions.
974                  */
975                 XFS_BMAP_INIT(args->flist, args->firstblock);
976                 error = xfs_attr_leaf_to_node(args);
977                 if (!error) {
978                         error = xfs_bmap_finish(&args->trans, args->flist,
979                                                 &committed);
980                 }
981                 if (error) {
982                         ASSERT(committed);
983                         args->trans = NULL;
984                         xfs_bmap_cancel(args->flist);
985                         return(error);
986                 }
987
988                 /*
989                  * bmap_finish() may have committed the last trans and started
990                  * a new one.  We need the inode to be in all transactions.
991                  */
992                 if (committed) {
993                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
994                         xfs_trans_ihold(args->trans, dp);
995                 }
996
997                 /*
998                  * Commit the current trans (including the inode) and start
999                  * a new one.
1000                  */
1001                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1002                         return (error);
1003
1004                 /*
1005                  * Fob the whole rest of the problem off on the Btree code.
1006                  */
1007                 error = xfs_attr_node_addname(args);
1008                 return(error);
1009         }
1010
1011         /*
1012          * Commit the transaction that added the attr name so that
1013          * later routines can manage their own transactions.
1014          */
1015         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1016                 return (error);
1017
1018         /*
1019          * If there was an out-of-line value, allocate the blocks we
1020          * identified for its storage and copy the value.  This is done
1021          * after we create the attribute so that we don't overflow the
1022          * maximum size of a transaction and/or hit a deadlock.
1023          */
1024         if (args->rmtblkno > 0) {
1025                 error = xfs_attr_rmtval_set(args);
1026                 if (error)
1027                         return(error);
1028         }
1029
1030         /*
1031          * If this is an atomic rename operation, we must "flip" the
1032          * incomplete flags on the "new" and "old" attribute/value pairs
1033          * so that one disappears and one appears atomically.  Then we
1034          * must remove the "old" attribute/value pair.
1035          */
1036         if (args->rename) {
1037                 /*
1038                  * In a separate transaction, set the incomplete flag on the
1039                  * "old" attr and clear the incomplete flag on the "new" attr.
1040                  */
1041                 error = xfs_attr_leaf_flipflags(args);
1042                 if (error)
1043                         return(error);
1044
1045                 /*
1046                  * Dismantle the "old" attribute/value pair by removing
1047                  * a "remote" value (if it exists).
1048                  */
1049                 args->index = args->index2;
1050                 args->blkno = args->blkno2;
1051                 args->rmtblkno = args->rmtblkno2;
1052                 args->rmtblkcnt = args->rmtblkcnt2;
1053                 if (args->rmtblkno) {
1054                         error = xfs_attr_rmtval_remove(args);
1055                         if (error)
1056                                 return(error);
1057                 }
1058
1059                 /*
1060                  * Read in the block containing the "old" attr, then
1061                  * remove the "old" attr from that block (neat, huh!)
1062                  */
1063                 error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1,
1064                                                      &bp, XFS_ATTR_FORK);
1065                 if (error)
1066                         return(error);
1067                 ASSERT(bp != NULL);
1068                 (void)xfs_attr_leaf_remove(bp, args);
1069
1070                 /*
1071                  * If the result is small enough, shrink it all into the inode.
1072                  */
1073                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1074                         XFS_BMAP_INIT(args->flist, args->firstblock);
1075                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1076                         /* bp is gone due to xfs_da_shrink_inode */
1077                         if (!error) {
1078                                 error = xfs_bmap_finish(&args->trans,
1079                                                         args->flist,
1080                                                         &committed);
1081                         }
1082                         if (error) {
1083                                 ASSERT(committed);
1084                                 args->trans = NULL;
1085                                 xfs_bmap_cancel(args->flist);
1086                                 return(error);
1087                         }
1088
1089                         /*
1090                          * bmap_finish() may have committed the last trans
1091                          * and started a new one.  We need the inode to be
1092                          * in all transactions.
1093                          */
1094                         if (committed) {
1095                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1096                                 xfs_trans_ihold(args->trans, dp);
1097                         }
1098                 } else
1099                         xfs_da_buf_done(bp);
1100
1101                 /*
1102                  * Commit the remove and start the next trans in series.
1103                  */
1104                 error = xfs_attr_rolltrans(&args->trans, dp);
1105
1106         } else if (args->rmtblkno > 0) {
1107                 /*
1108                  * Added a "remote" value, just clear the incomplete flag.
1109                  */
1110                 error = xfs_attr_leaf_clearflag(args);
1111         }
1112         return(error);
1113 }
1114
1115 /*
1116  * Remove a name from the leaf attribute list structure
1117  *
1118  * This leaf block cannot have a "remote" value, we only call this routine
1119  * if bmap_one_block() says there is only one block (ie: no remote blks).
1120  */
1121 STATIC int
1122 xfs_attr_leaf_removename(xfs_da_args_t *args)
1123 {
1124         xfs_inode_t *dp;
1125         xfs_dabuf_t *bp;
1126         int error, committed, forkoff;
1127
1128         /*
1129          * Remove the attribute.
1130          */
1131         dp = args->dp;
1132         args->blkno = 0;
1133         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1134                                              XFS_ATTR_FORK);
1135         if (error) {
1136                 return(error);
1137         }
1138
1139         ASSERT(bp != NULL);
1140         error = xfs_attr_leaf_lookup_int(bp, args);
1141         if (error == ENOATTR) {
1142                 xfs_da_brelse(args->trans, bp);
1143                 return(error);
1144         }
1145
1146         (void)xfs_attr_leaf_remove(bp, args);
1147
1148         /*
1149          * If the result is small enough, shrink it all into the inode.
1150          */
1151         if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1152                 XFS_BMAP_INIT(args->flist, args->firstblock);
1153                 error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1154                 /* bp is gone due to xfs_da_shrink_inode */
1155                 if (!error) {
1156                         error = xfs_bmap_finish(&args->trans, args->flist,
1157                                                 &committed);
1158                 }
1159                 if (error) {
1160                         ASSERT(committed);
1161                         args->trans = NULL;
1162                         xfs_bmap_cancel(args->flist);
1163                         return(error);
1164                 }
1165
1166                 /*
1167                  * bmap_finish() may have committed the last trans and started
1168                  * a new one.  We need the inode to be in all transactions.
1169                  */
1170                 if (committed) {
1171                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1172                         xfs_trans_ihold(args->trans, dp);
1173                 }
1174         } else
1175                 xfs_da_buf_done(bp);
1176         return(0);
1177 }
1178
1179 /*
1180  * Look up a name in a leaf attribute list structure.
1181  *
1182  * This leaf block cannot have a "remote" value, we only call this routine
1183  * if bmap_one_block() says there is only one block (ie: no remote blks).
1184  */
1185 STATIC int
1186 xfs_attr_leaf_get(xfs_da_args_t *args)
1187 {
1188         xfs_dabuf_t *bp;
1189         int error;
1190
1191         args->blkno = 0;
1192         error = xfs_da_read_buf(args->trans, args->dp, args->blkno, -1, &bp,
1193                                              XFS_ATTR_FORK);
1194         if (error)
1195                 return(error);
1196         ASSERT(bp != NULL);
1197
1198         error = xfs_attr_leaf_lookup_int(bp, args);
1199         if (error != EEXIST)  {
1200                 xfs_da_brelse(args->trans, bp);
1201                 return(error);
1202         }
1203         error = xfs_attr_leaf_getvalue(bp, args);
1204         xfs_da_brelse(args->trans, bp);
1205         if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
1206                 error = xfs_attr_rmtval_get(args);
1207         }
1208         return(error);
1209 }
1210
1211 /*
1212  * Copy out attribute entries for attr_list(), for leaf attribute lists.
1213  */
1214 STATIC int
1215 xfs_attr_leaf_list(xfs_attr_list_context_t *context)
1216 {
1217         xfs_attr_leafblock_t *leaf;
1218         int error;
1219         xfs_dabuf_t *bp;
1220
1221         context->cursor->blkno = 0;
1222         error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
1223         if (error)
1224                 return XFS_ERROR(error);
1225         ASSERT(bp != NULL);
1226         leaf = bp->data;
1227         if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
1228                 XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
1229                                      context->dp->i_mount, leaf);
1230                 xfs_da_brelse(NULL, bp);
1231                 return XFS_ERROR(EFSCORRUPTED);
1232         }
1233
1234         error = xfs_attr_leaf_list_int(bp, context);
1235         xfs_da_brelse(NULL, bp);
1236         return XFS_ERROR(error);
1237 }
1238
1239
1240 /*========================================================================
1241  * External routines when attribute list size > XFS_LBSIZE(mp).
1242  *========================================================================*/
1243
1244 /*
1245  * Add a name to a Btree-format attribute list.
1246  *
1247  * This will involve walking down the Btree, and may involve splitting
1248  * leaf nodes and even splitting intermediate nodes up to and including
1249  * the root node (a special case of an intermediate node).
1250  *
1251  * "Remote" attribute values confuse the issue and atomic rename operations
1252  * add a whole extra layer of confusion on top of that.
1253  */
1254 STATIC int
1255 xfs_attr_node_addname(xfs_da_args_t *args)
1256 {
1257         xfs_da_state_t *state;
1258         xfs_da_state_blk_t *blk;
1259         xfs_inode_t *dp;
1260         xfs_mount_t *mp;
1261         int committed, retval, error;
1262
1263         /*
1264          * Fill in bucket of arguments/results/context to carry around.
1265          */
1266         dp = args->dp;
1267         mp = dp->i_mount;
1268 restart:
1269         state = xfs_da_state_alloc();
1270         state->args = args;
1271         state->mp = mp;
1272         state->blocksize = state->mp->m_sb.sb_blocksize;
1273         state->node_ents = state->mp->m_attr_node_ents;
1274
1275         /*
1276          * Search to see if name already exists, and get back a pointer
1277          * to where it should go.
1278          */
1279         error = xfs_da_node_lookup_int(state, &retval);
1280         if (error)
1281                 goto out;
1282         blk = &state->path.blk[ state->path.active-1 ];
1283         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1284         if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
1285                 goto out;
1286         } else if (retval == EEXIST) {
1287                 if (args->flags & ATTR_CREATE)
1288                         goto out;
1289                 args->rename = 1;                       /* atomic rename op */
1290                 args->blkno2 = args->blkno;             /* set 2nd entry info*/
1291                 args->index2 = args->index;
1292                 args->rmtblkno2 = args->rmtblkno;
1293                 args->rmtblkcnt2 = args->rmtblkcnt;
1294                 args->rmtblkno = 0;
1295                 args->rmtblkcnt = 0;
1296         }
1297
1298         retval = xfs_attr_leaf_add(blk->bp, state->args);
1299         if (retval == ENOSPC) {
1300                 if (state->path.active == 1) {
1301                         /*
1302                          * Its really a single leaf node, but it had
1303                          * out-of-line values so it looked like it *might*
1304                          * have been a b-tree.
1305                          */
1306                         xfs_da_state_free(state);
1307                         XFS_BMAP_INIT(args->flist, args->firstblock);
1308                         error = xfs_attr_leaf_to_node(args);
1309                         if (!error) {
1310                                 error = xfs_bmap_finish(&args->trans,
1311                                                         args->flist,
1312                                                         &committed);
1313                         }
1314                         if (error) {
1315                                 ASSERT(committed);
1316                                 args->trans = NULL;
1317                                 xfs_bmap_cancel(args->flist);
1318                                 goto out;
1319                         }
1320
1321                         /*
1322                          * bmap_finish() may have committed the last trans
1323                          * and started a new one.  We need the inode to be
1324                          * in all transactions.
1325                          */
1326                         if (committed) {
1327                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1328                                 xfs_trans_ihold(args->trans, dp);
1329                         }
1330
1331                         /*
1332                          * Commit the node conversion and start the next
1333                          * trans in the chain.
1334                          */
1335                         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1336                                 goto out;
1337
1338                         goto restart;
1339                 }
1340
1341                 /*
1342                  * Split as many Btree elements as required.
1343                  * This code tracks the new and old attr's location
1344                  * in the index/blkno/rmtblkno/rmtblkcnt fields and
1345                  * in the index2/blkno2/rmtblkno2/rmtblkcnt2 fields.
1346                  */
1347                 XFS_BMAP_INIT(args->flist, args->firstblock);
1348                 error = xfs_da_split(state);
1349                 if (!error) {
1350                         error = xfs_bmap_finish(&args->trans, args->flist,
1351                                                 &committed);
1352                 }
1353                 if (error) {
1354                         ASSERT(committed);
1355                         args->trans = NULL;
1356                         xfs_bmap_cancel(args->flist);
1357                         goto out;
1358                 }
1359
1360                 /*
1361                  * bmap_finish() may have committed the last trans and started
1362                  * a new one.  We need the inode to be in all transactions.
1363                  */
1364                 if (committed) {
1365                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1366                         xfs_trans_ihold(args->trans, dp);
1367                 }
1368         } else {
1369                 /*
1370                  * Addition succeeded, update Btree hashvals.
1371                  */
1372                 xfs_da_fixhashpath(state, &state->path);
1373         }
1374
1375         /*
1376          * Kill the state structure, we're done with it and need to
1377          * allow the buffers to come back later.
1378          */
1379         xfs_da_state_free(state);
1380         state = NULL;
1381
1382         /*
1383          * Commit the leaf addition or btree split and start the next
1384          * trans in the chain.
1385          */
1386         if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1387                 goto out;
1388
1389         /*
1390          * If there was an out-of-line value, allocate the blocks we
1391          * identified for its storage and copy the value.  This is done
1392          * after we create the attribute so that we don't overflow the
1393          * maximum size of a transaction and/or hit a deadlock.
1394          */
1395         if (args->rmtblkno > 0) {
1396                 error = xfs_attr_rmtval_set(args);
1397                 if (error)
1398                         return(error);
1399         }
1400
1401         /*
1402          * If this is an atomic rename operation, we must "flip" the
1403          * incomplete flags on the "new" and "old" attribute/value pairs
1404          * so that one disappears and one appears atomically.  Then we
1405          * must remove the "old" attribute/value pair.
1406          */
1407         if (args->rename) {
1408                 /*
1409                  * In a separate transaction, set the incomplete flag on the
1410                  * "old" attr and clear the incomplete flag on the "new" attr.
1411                  */
1412                 error = xfs_attr_leaf_flipflags(args);
1413                 if (error)
1414                         goto out;
1415
1416                 /*
1417                  * Dismantle the "old" attribute/value pair by removing
1418                  * a "remote" value (if it exists).
1419                  */
1420                 args->index = args->index2;
1421                 args->blkno = args->blkno2;
1422                 args->rmtblkno = args->rmtblkno2;
1423                 args->rmtblkcnt = args->rmtblkcnt2;
1424                 if (args->rmtblkno) {
1425                         error = xfs_attr_rmtval_remove(args);
1426                         if (error)
1427                                 return(error);
1428                 }
1429
1430                 /*
1431                  * Re-find the "old" attribute entry after any split ops.
1432                  * The INCOMPLETE flag means that we will find the "old"
1433                  * attr, not the "new" one.
1434                  */
1435                 args->flags |= XFS_ATTR_INCOMPLETE;
1436                 state = xfs_da_state_alloc();
1437                 state->args = args;
1438                 state->mp = mp;
1439                 state->blocksize = state->mp->m_sb.sb_blocksize;
1440                 state->node_ents = state->mp->m_attr_node_ents;
1441                 state->inleaf = 0;
1442                 error = xfs_da_node_lookup_int(state, &retval);
1443                 if (error)
1444                         goto out;
1445
1446                 /*
1447                  * Remove the name and update the hashvals in the tree.
1448                  */
1449                 blk = &state->path.blk[ state->path.active-1 ];
1450                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1451                 error = xfs_attr_leaf_remove(blk->bp, args);
1452                 xfs_da_fixhashpath(state, &state->path);
1453
1454                 /*
1455                  * Check to see if the tree needs to be collapsed.
1456                  */
1457                 if (retval && (state->path.active > 1)) {
1458                         XFS_BMAP_INIT(args->flist, args->firstblock);
1459                         error = xfs_da_join(state);
1460                         if (!error) {
1461                                 error = xfs_bmap_finish(&args->trans,
1462                                                         args->flist,
1463                                                         &committed);
1464                         }
1465                         if (error) {
1466                                 ASSERT(committed);
1467                                 args->trans = NULL;
1468                                 xfs_bmap_cancel(args->flist);
1469                                 goto out;
1470                         }
1471
1472                         /*
1473                          * bmap_finish() may have committed the last trans
1474                          * and started a new one.  We need the inode to be
1475                          * in all transactions.
1476                          */
1477                         if (committed) {
1478                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1479                                 xfs_trans_ihold(args->trans, dp);
1480                         }
1481                 }
1482
1483                 /*
1484                  * Commit and start the next trans in the chain.
1485                  */
1486                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1487                         goto out;
1488
1489         } else if (args->rmtblkno > 0) {
1490                 /*
1491                  * Added a "remote" value, just clear the incomplete flag.
1492                  */
1493                 error = xfs_attr_leaf_clearflag(args);
1494                 if (error)
1495                         goto out;
1496         }
1497         retval = error = 0;
1498
1499 out:
1500         if (state)
1501                 xfs_da_state_free(state);
1502         if (error)
1503                 return(error);
1504         return(retval);
1505 }
1506
1507 /*
1508  * Remove a name from a B-tree attribute list.
1509  *
1510  * This will involve walking down the Btree, and may involve joining
1511  * leaf nodes and even joining intermediate nodes up to and including
1512  * the root node (a special case of an intermediate node).
1513  */
1514 STATIC int
1515 xfs_attr_node_removename(xfs_da_args_t *args)
1516 {
1517         xfs_da_state_t *state;
1518         xfs_da_state_blk_t *blk;
1519         xfs_inode_t *dp;
1520         xfs_dabuf_t *bp;
1521         int retval, error, committed, forkoff;
1522
1523         /*
1524          * Tie a string around our finger to remind us where we are.
1525          */
1526         dp = args->dp;
1527         state = xfs_da_state_alloc();
1528         state->args = args;
1529         state->mp = dp->i_mount;
1530         state->blocksize = state->mp->m_sb.sb_blocksize;
1531         state->node_ents = state->mp->m_attr_node_ents;
1532
1533         /*
1534          * Search to see if name exists, and get back a pointer to it.
1535          */
1536         error = xfs_da_node_lookup_int(state, &retval);
1537         if (error || (retval != EEXIST)) {
1538                 if (error == 0)
1539                         error = retval;
1540                 goto out;
1541         }
1542
1543         /*
1544          * If there is an out-of-line value, de-allocate the blocks.
1545          * This is done before we remove the attribute so that we don't
1546          * overflow the maximum size of a transaction and/or hit a deadlock.
1547          */
1548         blk = &state->path.blk[ state->path.active-1 ];
1549         ASSERT(blk->bp != NULL);
1550         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1551         if (args->rmtblkno > 0) {
1552                 /*
1553                  * Fill in disk block numbers in the state structure
1554                  * so that we can get the buffers back after we commit
1555                  * several transactions in the following calls.
1556                  */
1557                 error = xfs_attr_fillstate(state);
1558                 if (error)
1559                         goto out;
1560
1561                 /*
1562                  * Mark the attribute as INCOMPLETE, then bunmapi() the
1563                  * remote value.
1564                  */
1565                 error = xfs_attr_leaf_setflag(args);
1566                 if (error)
1567                         goto out;
1568                 error = xfs_attr_rmtval_remove(args);
1569                 if (error)
1570                         goto out;
1571
1572                 /*
1573                  * Refill the state structure with buffers, the prior calls
1574                  * released our buffers.
1575                  */
1576                 error = xfs_attr_refillstate(state);
1577                 if (error)
1578                         goto out;
1579         }
1580
1581         /*
1582          * Remove the name and update the hashvals in the tree.
1583          */
1584         blk = &state->path.blk[ state->path.active-1 ];
1585         ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1586         retval = xfs_attr_leaf_remove(blk->bp, args);
1587         xfs_da_fixhashpath(state, &state->path);
1588
1589         /*
1590          * Check to see if the tree needs to be collapsed.
1591          */
1592         if (retval && (state->path.active > 1)) {
1593                 XFS_BMAP_INIT(args->flist, args->firstblock);
1594                 error = xfs_da_join(state);
1595                 if (!error) {
1596                         error = xfs_bmap_finish(&args->trans, args->flist,
1597                                                 &committed);
1598                 }
1599                 if (error) {
1600                         ASSERT(committed);
1601                         args->trans = NULL;
1602                         xfs_bmap_cancel(args->flist);
1603                         goto out;
1604                 }
1605
1606                 /*
1607                  * bmap_finish() may have committed the last trans and started
1608                  * a new one.  We need the inode to be in all transactions.
1609                  */
1610                 if (committed) {
1611                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1612                         xfs_trans_ihold(args->trans, dp);
1613                 }
1614
1615                 /*
1616                  * Commit the Btree join operation and start a new trans.
1617                  */
1618                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
1619                         goto out;
1620         }
1621
1622         /*
1623          * If the result is small enough, push it all into the inode.
1624          */
1625         if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {
1626                 /*
1627                  * Have to get rid of the copy of this dabuf in the state.
1628                  */
1629                 ASSERT(state->path.active == 1);
1630                 ASSERT(state->path.blk[0].bp);
1631                 xfs_da_buf_done(state->path.blk[0].bp);
1632                 state->path.blk[0].bp = NULL;
1633
1634                 error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
1635                                                      XFS_ATTR_FORK);
1636                 if (error)
1637                         goto out;
1638                 ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
1639                                       bp->data)->hdr.info.magic)
1640                                                        == XFS_ATTR_LEAF_MAGIC);
1641
1642                 if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
1643                         XFS_BMAP_INIT(args->flist, args->firstblock);
1644                         error = xfs_attr_leaf_to_shortform(bp, args, forkoff);
1645                         /* bp is gone due to xfs_da_shrink_inode */
1646                         if (!error) {
1647                                 error = xfs_bmap_finish(&args->trans,
1648                                                         args->flist,
1649                                                         &committed);
1650                         }
1651                         if (error) {
1652                                 ASSERT(committed);
1653                                 args->trans = NULL;
1654                                 xfs_bmap_cancel(args->flist);
1655                                 goto out;
1656                         }
1657
1658                         /*
1659                          * bmap_finish() may have committed the last trans
1660                          * and started a new one.  We need the inode to be
1661                          * in all transactions.
1662                          */
1663                         if (committed) {
1664                                 xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
1665                                 xfs_trans_ihold(args->trans, dp);
1666                         }
1667                 } else
1668                         xfs_da_brelse(args->trans, bp);
1669         }
1670         error = 0;
1671
1672 out:
1673         xfs_da_state_free(state);
1674         return(error);
1675 }
1676
1677 /*
1678  * Fill in the disk block numbers in the state structure for the buffers
1679  * that are attached to the state structure.
1680  * This is done so that we can quickly reattach ourselves to those buffers
1681  * after some set of transaction commits have released these buffers.
1682  */
1683 STATIC int
1684 xfs_attr_fillstate(xfs_da_state_t *state)
1685 {
1686         xfs_da_state_path_t *path;
1687         xfs_da_state_blk_t *blk;
1688         int level;
1689
1690         /*
1691          * Roll down the "path" in the state structure, storing the on-disk
1692          * block number for those buffers in the "path".
1693          */
1694         path = &state->path;
1695         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1696         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1697                 if (blk->bp) {
1698                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1699                         xfs_da_buf_done(blk->bp);
1700                         blk->bp = NULL;
1701                 } else {
1702                         blk->disk_blkno = 0;
1703                 }
1704         }
1705
1706         /*
1707          * Roll down the "altpath" in the state structure, storing the on-disk
1708          * block number for those buffers in the "altpath".
1709          */
1710         path = &state->altpath;
1711         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1712         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1713                 if (blk->bp) {
1714                         blk->disk_blkno = xfs_da_blkno(blk->bp);
1715                         xfs_da_buf_done(blk->bp);
1716                         blk->bp = NULL;
1717                 } else {
1718                         blk->disk_blkno = 0;
1719                 }
1720         }
1721
1722         return(0);
1723 }
1724
1725 /*
1726  * Reattach the buffers to the state structure based on the disk block
1727  * numbers stored in the state structure.
1728  * This is done after some set of transaction commits have released those
1729  * buffers from our grip.
1730  */
1731 STATIC int
1732 xfs_attr_refillstate(xfs_da_state_t *state)
1733 {
1734         xfs_da_state_path_t *path;
1735         xfs_da_state_blk_t *blk;
1736         int level, error;
1737
1738         /*
1739          * Roll down the "path" in the state structure, storing the on-disk
1740          * block number for those buffers in the "path".
1741          */
1742         path = &state->path;
1743         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1744         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1745                 if (blk->disk_blkno) {
1746                         error = xfs_da_read_buf(state->args->trans,
1747                                                 state->args->dp,
1748                                                 blk->blkno, blk->disk_blkno,
1749                                                 &blk->bp, XFS_ATTR_FORK);
1750                         if (error)
1751                                 return(error);
1752                 } else {
1753                         blk->bp = NULL;
1754                 }
1755         }
1756
1757         /*
1758          * Roll down the "altpath" in the state structure, storing the on-disk
1759          * block number for those buffers in the "altpath".
1760          */
1761         path = &state->altpath;
1762         ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
1763         for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
1764                 if (blk->disk_blkno) {
1765                         error = xfs_da_read_buf(state->args->trans,
1766                                                 state->args->dp,
1767                                                 blk->blkno, blk->disk_blkno,
1768                                                 &blk->bp, XFS_ATTR_FORK);
1769                         if (error)
1770                                 return(error);
1771                 } else {
1772                         blk->bp = NULL;
1773                 }
1774         }
1775
1776         return(0);
1777 }
1778
1779 /*
1780  * Look up a filename in a node attribute list.
1781  *
1782  * This routine gets called for any attribute fork that has more than one
1783  * block, ie: both true Btree attr lists and for single-leaf-blocks with
1784  * "remote" values taking up more blocks.
1785  */
1786 STATIC int
1787 xfs_attr_node_get(xfs_da_args_t *args)
1788 {
1789         xfs_da_state_t *state;
1790         xfs_da_state_blk_t *blk;
1791         int error, retval;
1792         int i;
1793
1794         state = xfs_da_state_alloc();
1795         state->args = args;
1796         state->mp = args->dp->i_mount;
1797         state->blocksize = state->mp->m_sb.sb_blocksize;
1798         state->node_ents = state->mp->m_attr_node_ents;
1799
1800         /*
1801          * Search to see if name exists, and get back a pointer to it.
1802          */
1803         error = xfs_da_node_lookup_int(state, &retval);
1804         if (error) {
1805                 retval = error;
1806         } else if (retval == EEXIST) {
1807                 blk = &state->path.blk[ state->path.active-1 ];
1808                 ASSERT(blk->bp != NULL);
1809                 ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
1810
1811                 /*
1812                  * Get the value, local or "remote"
1813                  */
1814                 retval = xfs_attr_leaf_getvalue(blk->bp, args);
1815                 if (!retval && (args->rmtblkno > 0)
1816                     && !(args->flags & ATTR_KERNOVAL)) {
1817                         retval = xfs_attr_rmtval_get(args);
1818                 }
1819         }
1820
1821         /*
1822          * If not in a transaction, we have to release all the buffers.
1823          */
1824         for (i = 0; i < state->path.active; i++) {
1825                 xfs_da_brelse(args->trans, state->path.blk[i].bp);
1826                 state->path.blk[i].bp = NULL;
1827         }
1828
1829         xfs_da_state_free(state);
1830         return(retval);
1831 }
1832
1833 STATIC int                                                      /* error */
1834 xfs_attr_node_list(xfs_attr_list_context_t *context)
1835 {
1836         attrlist_cursor_kern_t *cursor;
1837         xfs_attr_leafblock_t *leaf;
1838         xfs_da_intnode_t *node;
1839         xfs_da_node_entry_t *btree;
1840         int error, i;
1841         xfs_dabuf_t *bp;
1842
1843         cursor = context->cursor;
1844         cursor->initted = 1;
1845
1846         /*
1847          * Do all sorts of validation on the passed-in cursor structure.
1848          * If anything is amiss, ignore the cursor and look up the hashval
1849          * starting from the btree root.
1850          */
1851         bp = NULL;
1852         if (cursor->blkno > 0) {
1853                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1854                                               &bp, XFS_ATTR_FORK);
1855                 if ((error != 0) && (error != EFSCORRUPTED))
1856                         return(error);
1857                 if (bp) {
1858                         node = bp->data;
1859                         switch (be16_to_cpu(node->hdr.info.magic)) {
1860                         case XFS_DA_NODE_MAGIC:
1861                                 xfs_attr_trace_l_cn("wrong blk", context, node);
1862                                 xfs_da_brelse(NULL, bp);
1863                                 bp = NULL;
1864                                 break;
1865                         case XFS_ATTR_LEAF_MAGIC:
1866                                 leaf = bp->data;
1867                                 if (cursor->hashval > be32_to_cpu(leaf->entries[
1868                                     be16_to_cpu(leaf->hdr.count)-1].hashval)) {
1869                                         xfs_attr_trace_l_cl("wrong blk",
1870                                                            context, leaf);
1871                                         xfs_da_brelse(NULL, bp);
1872                                         bp = NULL;
1873                                 } else if (cursor->hashval <=
1874                                              be32_to_cpu(leaf->entries[0].hashval)) {
1875                                         xfs_attr_trace_l_cl("maybe wrong blk",
1876                                                            context, leaf);
1877                                         xfs_da_brelse(NULL, bp);
1878                                         bp = NULL;
1879                                 }
1880                                 break;
1881                         default:
1882                                 xfs_attr_trace_l_c("wrong blk - ??", context);
1883                                 xfs_da_brelse(NULL, bp);
1884                                 bp = NULL;
1885                         }
1886                 }
1887         }
1888
1889         /*
1890          * We did not find what we expected given the cursor's contents,
1891          * so we start from the top and work down based on the hash value.
1892          * Note that start of node block is same as start of leaf block.
1893          */
1894         if (bp == NULL) {
1895                 cursor->blkno = 0;
1896                 for (;;) {
1897                         error = xfs_da_read_buf(NULL, context->dp,
1898                                                       cursor->blkno, -1, &bp,
1899                                                       XFS_ATTR_FORK);
1900                         if (error)
1901                                 return(error);
1902                         if (unlikely(bp == NULL)) {
1903                                 XFS_ERROR_REPORT("xfs_attr_node_list(2)",
1904                                                  XFS_ERRLEVEL_LOW,
1905                                                  context->dp->i_mount);
1906                                 return(XFS_ERROR(EFSCORRUPTED));
1907                         }
1908                         node = bp->data;
1909                         if (be16_to_cpu(node->hdr.info.magic)
1910                                                         == XFS_ATTR_LEAF_MAGIC)
1911                                 break;
1912                         if (unlikely(be16_to_cpu(node->hdr.info.magic)
1913                                                         != XFS_DA_NODE_MAGIC)) {
1914                                 XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
1915                                                      XFS_ERRLEVEL_LOW,
1916                                                      context->dp->i_mount,
1917                                                      node);
1918                                 xfs_da_brelse(NULL, bp);
1919                                 return(XFS_ERROR(EFSCORRUPTED));
1920                         }
1921                         btree = node->btree;
1922                         for (i = 0; i < be16_to_cpu(node->hdr.count);
1923                                                                 btree++, i++) {
1924                                 if (cursor->hashval
1925                                                 <= be32_to_cpu(btree->hashval)) {
1926                                         cursor->blkno = be32_to_cpu(btree->before);
1927                                         xfs_attr_trace_l_cb("descending",
1928                                                             context, btree);
1929                                         break;
1930                                 }
1931                         }
1932                         if (i == be16_to_cpu(node->hdr.count)) {
1933                                 xfs_da_brelse(NULL, bp);
1934                                 return(0);
1935                         }
1936                         xfs_da_brelse(NULL, bp);
1937                 }
1938         }
1939         ASSERT(bp != NULL);
1940
1941         /*
1942          * Roll upward through the blocks, processing each leaf block in
1943          * order.  As long as there is space in the result buffer, keep
1944          * adding the information.
1945          */
1946         for (;;) {
1947                 leaf = bp->data;
1948                 if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
1949                                                 != XFS_ATTR_LEAF_MAGIC)) {
1950                         XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
1951                                              XFS_ERRLEVEL_LOW,
1952                                              context->dp->i_mount, leaf);
1953                         xfs_da_brelse(NULL, bp);
1954                         return(XFS_ERROR(EFSCORRUPTED));
1955                 }
1956                 error = xfs_attr_leaf_list_int(bp, context);
1957                 if (error) {
1958                         xfs_da_brelse(NULL, bp);
1959                         return error;
1960                 }
1961                 if (context->seen_enough || leaf->hdr.info.forw == 0)
1962                         break;
1963                 cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
1964                 xfs_da_brelse(NULL, bp);
1965                 error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
1966                                               &bp, XFS_ATTR_FORK);
1967                 if (error)
1968                         return(error);
1969                 if (unlikely((bp == NULL))) {
1970                         XFS_ERROR_REPORT("xfs_attr_node_list(5)",
1971                                          XFS_ERRLEVEL_LOW,
1972                                          context->dp->i_mount);
1973                         return(XFS_ERROR(EFSCORRUPTED));
1974                 }
1975         }
1976         xfs_da_brelse(NULL, bp);
1977         return(0);
1978 }
1979
1980
1981 /*========================================================================
1982  * External routines for manipulating out-of-line attribute values.
1983  *========================================================================*/
1984
1985 /*
1986  * Read the value associated with an attribute from the out-of-line buffer
1987  * that we stored it in.
1988  */
1989 int
1990 xfs_attr_rmtval_get(xfs_da_args_t *args)
1991 {
1992         xfs_bmbt_irec_t map[ATTR_RMTVALUE_MAPSIZE];
1993         xfs_mount_t *mp;
1994         xfs_daddr_t dblkno;
1995         xfs_caddr_t dst;
1996         xfs_buf_t *bp;
1997         int nmap, error, tmp, valuelen, blkcnt, i;
1998         xfs_dablk_t lblkno;
1999
2000         ASSERT(!(args->flags & ATTR_KERNOVAL));
2001
2002         mp = args->dp->i_mount;
2003         dst = args->value;
2004         valuelen = args->valuelen;
2005         lblkno = args->rmtblkno;
2006         while (valuelen > 0) {
2007                 nmap = ATTR_RMTVALUE_MAPSIZE;
2008                 error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
2009                                   args->rmtblkcnt,
2010                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2011                                   NULL, 0, map, &nmap, NULL, NULL);
2012                 if (error)
2013                         return(error);
2014                 ASSERT(nmap >= 1);
2015
2016                 for (i = 0; (i < nmap) && (valuelen > 0); i++) {
2017                         ASSERT((map[i].br_startblock != DELAYSTARTBLOCK) &&
2018                                (map[i].br_startblock != HOLESTARTBLOCK));
2019                         dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
2020                         blkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
2021                         error = xfs_read_buf(mp, mp->m_ddev_targp, dblkno,
2022                                              blkcnt, XFS_BUF_LOCK, &bp);
2023                         if (error)
2024                                 return(error);
2025
2026                         tmp = (valuelen < XFS_BUF_SIZE(bp))
2027                                 ? valuelen : XFS_BUF_SIZE(bp);
2028                         xfs_biomove(bp, 0, tmp, dst, XFS_B_READ);
2029                         xfs_buf_relse(bp);
2030                         dst += tmp;
2031                         valuelen -= tmp;
2032
2033                         lblkno += map[i].br_blockcount;
2034                 }
2035         }
2036         ASSERT(valuelen == 0);
2037         return(0);
2038 }
2039
2040 /*
2041  * Write the value associated with an attribute into the out-of-line buffer
2042  * that we have defined for it.
2043  */
2044 STATIC int
2045 xfs_attr_rmtval_set(xfs_da_args_t *args)
2046 {
2047         xfs_mount_t *mp;
2048         xfs_fileoff_t lfileoff;
2049         xfs_inode_t *dp;
2050         xfs_bmbt_irec_t map;
2051         xfs_daddr_t dblkno;
2052         xfs_caddr_t src;
2053         xfs_buf_t *bp;
2054         xfs_dablk_t lblkno;
2055         int blkcnt, valuelen, nmap, error, tmp, committed;
2056
2057         dp = args->dp;
2058         mp = dp->i_mount;
2059         src = args->value;
2060
2061         /*
2062          * Find a "hole" in the attribute address space large enough for
2063          * us to drop the new attribute's value into.
2064          */
2065         blkcnt = XFS_B_TO_FSB(mp, args->valuelen);
2066         lfileoff = 0;
2067         error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,
2068                                                    XFS_ATTR_FORK);
2069         if (error) {
2070                 return(error);
2071         }
2072         args->rmtblkno = lblkno = (xfs_dablk_t)lfileoff;
2073         args->rmtblkcnt = blkcnt;
2074
2075         /*
2076          * Roll through the "value", allocating blocks on disk as required.
2077          */
2078         while (blkcnt > 0) {
2079                 /*
2080                  * Allocate a single extent, up to the size of the value.
2081                  */
2082                 XFS_BMAP_INIT(args->flist, args->firstblock);
2083                 nmap = 1;
2084                 error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno,
2085                                   blkcnt,
2086                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
2087                                                         XFS_BMAPI_WRITE,
2088                                   args->firstblock, args->total, &map, &nmap,
2089                                   args->flist, NULL);
2090                 if (!error) {
2091                         error = xfs_bmap_finish(&args->trans, args->flist,
2092                                                 &committed);
2093                 }
2094                 if (error) {
2095                         ASSERT(committed);
2096                         args->trans = NULL;
2097                         xfs_bmap_cancel(args->flist);
2098                         return(error);
2099                 }
2100
2101                 /*
2102                  * bmap_finish() may have committed the last trans and started
2103                  * a new one.  We need the inode to be in all transactions.
2104                  */
2105                 if (committed) {
2106                         xfs_trans_ijoin(args->trans, dp, XFS_ILOCK_EXCL);
2107                         xfs_trans_ihold(args->trans, dp);
2108                 }
2109
2110                 ASSERT(nmap == 1);
2111                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2112                        (map.br_startblock != HOLESTARTBLOCK));
2113                 lblkno += map.br_blockcount;
2114                 blkcnt -= map.br_blockcount;
2115
2116                 /*
2117                  * Start the next trans in the chain.
2118                  */
2119                 if ((error = xfs_attr_rolltrans(&args->trans, dp)))
2120                         return (error);
2121         }
2122
2123         /*
2124          * Roll through the "value", copying the attribute value to the
2125          * already-allocated blocks.  Blocks are written synchronously
2126          * so that we can know they are all on disk before we turn off
2127          * the INCOMPLETE flag.
2128          */
2129         lblkno = args->rmtblkno;
2130         valuelen = args->valuelen;
2131         while (valuelen > 0) {
2132                 /*
2133                  * Try to remember where we decided to put the value.
2134                  */
2135                 XFS_BMAP_INIT(args->flist, args->firstblock);
2136                 nmap = 1;
2137                 error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
2138                                   args->rmtblkcnt,
2139                                   XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2140                                   args->firstblock, 0, &map, &nmap,
2141                                   NULL, NULL);
2142                 if (error) {
2143                         return(error);
2144                 }
2145                 ASSERT(nmap == 1);
2146                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2147                        (map.br_startblock != HOLESTARTBLOCK));
2148
2149                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2150                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2151
2152                 bp = xfs_buf_get_flags(mp->m_ddev_targp, dblkno,
2153                                                         blkcnt, XFS_BUF_LOCK);
2154                 ASSERT(bp);
2155                 ASSERT(!XFS_BUF_GETERROR(bp));
2156
2157                 tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
2158                                                         XFS_BUF_SIZE(bp);
2159                 xfs_biomove(bp, 0, tmp, src, XFS_B_WRITE);
2160                 if (tmp < XFS_BUF_SIZE(bp))
2161                         xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
2162                 if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
2163                         return (error);
2164                 }
2165                 src += tmp;
2166                 valuelen -= tmp;
2167
2168                 lblkno += map.br_blockcount;
2169         }
2170         ASSERT(valuelen == 0);
2171         return(0);
2172 }
2173
2174 /*
2175  * Remove the value associated with an attribute by deleting the
2176  * out-of-line buffer that it is stored on.
2177  */
2178 STATIC int
2179 xfs_attr_rmtval_remove(xfs_da_args_t *args)
2180 {
2181         xfs_mount_t *mp;
2182         xfs_bmbt_irec_t map;
2183         xfs_buf_t *bp;
2184         xfs_daddr_t dblkno;
2185         xfs_dablk_t lblkno;
2186         int valuelen, blkcnt, nmap, error, done, committed;
2187
2188         mp = args->dp->i_mount;
2189
2190         /*
2191          * Roll through the "value", invalidating the attribute value's
2192          * blocks.
2193          */
2194         lblkno = args->rmtblkno;
2195         valuelen = args->rmtblkcnt;
2196         while (valuelen > 0) {
2197                 /*
2198                  * Try to remember where we decided to put the value.
2199                  */
2200                 XFS_BMAP_INIT(args->flist, args->firstblock);
2201                 nmap = 1;
2202                 error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno,
2203                                         args->rmtblkcnt,
2204                                         XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2205                                         args->firstblock, 0, &map, &nmap,
2206                                         args->flist, NULL);
2207                 if (error) {
2208                         return(error);
2209                 }
2210                 ASSERT(nmap == 1);
2211                 ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
2212                        (map.br_startblock != HOLESTARTBLOCK));
2213
2214                 dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
2215                 blkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
2216
2217                 /*
2218                  * If the "remote" value is in the cache, remove it.
2219                  */
2220                 bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt,
2221                                 XFS_INCORE_TRYLOCK);
2222                 if (bp) {
2223                         XFS_BUF_STALE(bp);
2224                         XFS_BUF_UNDELAYWRITE(bp);
2225                         xfs_buf_relse(bp);
2226                         bp = NULL;
2227                 }
2228
2229                 valuelen -= map.br_blockcount;
2230
2231                 lblkno += map.br_blockcount;
2232         }
2233
2234         /*
2235          * Keep de-allocating extents until the remote-value region is gone.
2236          */
2237         lblkno = args->rmtblkno;
2238         blkcnt = args->rmtblkcnt;
2239         done = 0;
2240         while (!done) {
2241                 XFS_BMAP_INIT(args->flist, args->firstblock);
2242                 error = xfs_bunmapi(args->trans, args->dp, lblkno, blkcnt,
2243                                     XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
2244                                     1, args->firstblock, args->flist,
2245                                     NULL, &done);
2246                 if (!error) {
2247                         error = xfs_bmap_finish(&args->trans, args->flist,
2248                                                 &committed);
2249                 }
2250                 if (error) {
2251                         ASSERT(committed);
2252                         args->trans = NULL;
2253                         xfs_bmap_cancel(args->flist);
2254                         return(error);
2255                 }
2256
2257                 /*
2258                  * bmap_finish() may have committed the last trans and started
2259                  * a new one.  We need the inode to be in all transactions.
2260                  */
2261                 if (committed) {
2262                         xfs_trans_ijoin(args->trans, args->dp, XFS_ILOCK_EXCL);
2263                         xfs_trans_ihold(args->trans, args->dp);
2264                 }
2265
2266                 /*
2267                  * Close out trans and start the next one in the chain.
2268                  */
2269                 if ((error = xfs_attr_rolltrans(&args->trans, args->dp)))
2270                         return (error);
2271         }
2272         return(0);
2273 }
2274
2275 #if defined(XFS_ATTR_TRACE)
2276 /*
2277  * Add a trace buffer entry for an attr_list context structure.
2278  */
2279 void
2280 xfs_attr_trace_l_c(char *where, struct xfs_attr_list_context *context)
2281 {
2282         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_C, where,
2283                 (__psunsigned_t)context->dp,
2284                 (__psunsigned_t)context->cursor->hashval,
2285                 (__psunsigned_t)context->cursor->blkno,
2286                 (__psunsigned_t)context->cursor->offset,
2287                 (__psunsigned_t)context->alist,
2288                 (__psunsigned_t)context->bufsize,
2289                 (__psunsigned_t)context->count,
2290                 (__psunsigned_t)context->firstu,
2291                 (__psunsigned_t)
2292                         ((context->count > 0) &&
2293                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2294                                 ? (ATTR_ENTRY(context->alist,
2295                                               context->count-1)->a_valuelen)
2296                                 : 0,
2297                 (__psunsigned_t)context->dupcnt,
2298                 (__psunsigned_t)context->flags,
2299                 (__psunsigned_t)NULL,
2300                 (__psunsigned_t)NULL,
2301                 (__psunsigned_t)NULL);
2302 }
2303
2304 /*
2305  * Add a trace buffer entry for a context structure and a Btree node.
2306  */
2307 void
2308 xfs_attr_trace_l_cn(char *where, struct xfs_attr_list_context *context,
2309                          struct xfs_da_intnode *node)
2310 {
2311         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CN, where,
2312                 (__psunsigned_t)context->dp,
2313                 (__psunsigned_t)context->cursor->hashval,
2314                 (__psunsigned_t)context->cursor->blkno,
2315                 (__psunsigned_t)context->cursor->offset,
2316                 (__psunsigned_t)context->alist,
2317                 (__psunsigned_t)context->bufsize,
2318                 (__psunsigned_t)context->count,
2319                 (__psunsigned_t)context->firstu,
2320                 (__psunsigned_t)
2321                         ((context->count > 0) &&
2322                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2323                                 ? (ATTR_ENTRY(context->alist,
2324                                               context->count-1)->a_valuelen)
2325                                 : 0,
2326                 (__psunsigned_t)context->dupcnt,
2327                 (__psunsigned_t)context->flags,
2328                 (__psunsigned_t)be16_to_cpu(node->hdr.count),
2329                 (__psunsigned_t)be32_to_cpu(node->btree[0].hashval),
2330                 (__psunsigned_t)be32_to_cpu(node->btree[
2331                                     be16_to_cpu(node->hdr.count)-1].hashval));
2332 }
2333
2334 /*
2335  * Add a trace buffer entry for a context structure and a Btree element.
2336  */
2337 void
2338 xfs_attr_trace_l_cb(char *where, struct xfs_attr_list_context *context,
2339                           struct xfs_da_node_entry *btree)
2340 {
2341         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CB, where,
2342                 (__psunsigned_t)context->dp,
2343                 (__psunsigned_t)context->cursor->hashval,
2344                 (__psunsigned_t)context->cursor->blkno,
2345                 (__psunsigned_t)context->cursor->offset,
2346                 (__psunsigned_t)context->alist,
2347                 (__psunsigned_t)context->bufsize,
2348                 (__psunsigned_t)context->count,
2349                 (__psunsigned_t)context->firstu,
2350                 (__psunsigned_t)
2351                         ((context->count > 0) &&
2352                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2353                                 ? (ATTR_ENTRY(context->alist,
2354                                               context->count-1)->a_valuelen)
2355                                 : 0,
2356                 (__psunsigned_t)context->dupcnt,
2357                 (__psunsigned_t)context->flags,
2358                 (__psunsigned_t)be32_to_cpu(btree->hashval),
2359                 (__psunsigned_t)be32_to_cpu(btree->before),
2360                 (__psunsigned_t)NULL);
2361 }
2362
2363 /*
2364  * Add a trace buffer entry for a context structure and a leaf block.
2365  */
2366 void
2367 xfs_attr_trace_l_cl(char *where, struct xfs_attr_list_context *context,
2368                               struct xfs_attr_leafblock *leaf)
2369 {
2370         xfs_attr_trace_enter(XFS_ATTR_KTRACE_L_CL, where,
2371                 (__psunsigned_t)context->dp,
2372                 (__psunsigned_t)context->cursor->hashval,
2373                 (__psunsigned_t)context->cursor->blkno,
2374                 (__psunsigned_t)context->cursor->offset,
2375                 (__psunsigned_t)context->alist,
2376                 (__psunsigned_t)context->bufsize,
2377                 (__psunsigned_t)context->count,
2378                 (__psunsigned_t)context->firstu,
2379                 (__psunsigned_t)
2380                         ((context->count > 0) &&
2381                         !(context->flags & (ATTR_KERNAMELS|ATTR_KERNOVAL)))
2382                                 ? (ATTR_ENTRY(context->alist,
2383                                               context->count-1)->a_valuelen)
2384                                 : 0,
2385                 (__psunsigned_t)context->dupcnt,
2386                 (__psunsigned_t)context->flags,
2387                 (__psunsigned_t)be16_to_cpu(leaf->hdr.count),
2388                 (__psunsigned_t)be32_to_cpu(leaf->entries[0].hashval),
2389                 (__psunsigned_t)be32_to_cpu(leaf->entries[
2390                                 be16_to_cpu(leaf->hdr.count)-1].hashval));
2391 }
2392
2393 /*
2394  * Add a trace buffer entry for the arguments given to the routine,
2395  * generic form.
2396  */
2397 void
2398 xfs_attr_trace_enter(int type, char *where,
2399                          __psunsigned_t a2, __psunsigned_t a3,
2400                          __psunsigned_t a4, __psunsigned_t a5,
2401                          __psunsigned_t a6, __psunsigned_t a7,
2402                          __psunsigned_t a8, __psunsigned_t a9,
2403                          __psunsigned_t a10, __psunsigned_t a11,
2404                          __psunsigned_t a12, __psunsigned_t a13,
2405                          __psunsigned_t a14, __psunsigned_t a15)
2406 {
2407         ASSERT(xfs_attr_trace_buf);
2408         ktrace_enter(xfs_attr_trace_buf, (void *)((__psunsigned_t)type),
2409                                          (void *)where,
2410                                          (void *)a2,  (void *)a3,  (void *)a4,
2411                                          (void *)a5,  (void *)a6,  (void *)a7,
2412                                          (void *)a8,  (void *)a9,  (void *)a10,
2413                                          (void *)a11, (void *)a12, (void *)a13,
2414                                          (void *)a14, (void *)a15);
2415 }
2416 #endif  /* XFS_ATTR_TRACE */
2417
2418
2419 /*========================================================================
2420  * System (pseudo) namespace attribute interface routines.
2421  *========================================================================*/
2422
2423 STATIC int
2424 posix_acl_access_set(
2425         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2426 {
2427         return xfs_acl_vset(vp, data, size, _ACL_TYPE_ACCESS);
2428 }
2429
2430 STATIC int
2431 posix_acl_access_remove(
2432         bhv_vnode_t *vp, char *name, int xflags)
2433 {
2434         return xfs_acl_vremove(vp, _ACL_TYPE_ACCESS);
2435 }
2436
2437 STATIC int
2438 posix_acl_access_get(
2439         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2440 {
2441         return xfs_acl_vget(vp, data, size, _ACL_TYPE_ACCESS);
2442 }
2443
2444 STATIC int
2445 posix_acl_access_exists(
2446         bhv_vnode_t *vp)
2447 {
2448         return xfs_acl_vhasacl_access(vp);
2449 }
2450
2451 STATIC int
2452 posix_acl_default_set(
2453         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2454 {
2455         return xfs_acl_vset(vp, data, size, _ACL_TYPE_DEFAULT);
2456 }
2457
2458 STATIC int
2459 posix_acl_default_get(
2460         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2461 {
2462         return xfs_acl_vget(vp, data, size, _ACL_TYPE_DEFAULT);
2463 }
2464
2465 STATIC int
2466 posix_acl_default_remove(
2467         bhv_vnode_t *vp, char *name, int xflags)
2468 {
2469         return xfs_acl_vremove(vp, _ACL_TYPE_DEFAULT);
2470 }
2471
2472 STATIC int
2473 posix_acl_default_exists(
2474         bhv_vnode_t *vp)
2475 {
2476         return xfs_acl_vhasacl_default(vp);
2477 }
2478
2479 static struct attrnames posix_acl_access = {
2480         .attr_name      = "posix_acl_access",
2481         .attr_namelen   = sizeof("posix_acl_access") - 1,
2482         .attr_get       = posix_acl_access_get,
2483         .attr_set       = posix_acl_access_set,
2484         .attr_remove    = posix_acl_access_remove,
2485         .attr_exists    = posix_acl_access_exists,
2486 };
2487
2488 static struct attrnames posix_acl_default = {
2489         .attr_name      = "posix_acl_default",
2490         .attr_namelen   = sizeof("posix_acl_default") - 1,
2491         .attr_get       = posix_acl_default_get,
2492         .attr_set       = posix_acl_default_set,
2493         .attr_remove    = posix_acl_default_remove,
2494         .attr_exists    = posix_acl_default_exists,
2495 };
2496
2497 static struct attrnames *attr_system_names[] =
2498         { &posix_acl_access, &posix_acl_default };
2499
2500
2501 /*========================================================================
2502  * Namespace-prefix-style attribute name interface routines.
2503  *========================================================================*/
2504
2505 STATIC int
2506 attr_generic_set(
2507         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2508 {
2509         return -bhv_vop_attr_set(vp, name, data, size, xflags, NULL);
2510 }
2511
2512 STATIC int
2513 attr_generic_get(
2514         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2515 {
2516         int     error, asize = size;
2517
2518         error = bhv_vop_attr_get(vp, name, data, &asize, xflags, NULL);
2519         if (!error)
2520                 return asize;
2521         return -error;
2522 }
2523
2524 STATIC int
2525 attr_generic_remove(
2526         bhv_vnode_t *vp, char *name, int xflags)
2527 {
2528         return -bhv_vop_attr_remove(vp, name, xflags, NULL);
2529 }
2530
2531 STATIC int
2532 attr_generic_listadd(
2533         attrnames_t             *prefix,
2534         attrnames_t             *namesp,
2535         void                    *data,
2536         size_t                  size,
2537         ssize_t                 *result)
2538 {
2539         char                    *p = data + *result;
2540
2541         *result += prefix->attr_namelen;
2542         *result += namesp->attr_namelen + 1;
2543         if (!size)
2544                 return 0;
2545         if (*result > size)
2546                 return -ERANGE;
2547         strcpy(p, prefix->attr_name);
2548         p += prefix->attr_namelen;
2549         strcpy(p, namesp->attr_name);
2550         p += namesp->attr_namelen + 1;
2551         return 0;
2552 }
2553
2554 STATIC int
2555 attr_system_list(
2556         bhv_vnode_t             *vp,
2557         void                    *data,
2558         size_t                  size,
2559         ssize_t                 *result)
2560 {
2561         attrnames_t             *namesp;
2562         int                     i, error = 0;
2563
2564         for (i = 0; i < ATTR_SYSCOUNT; i++) {
2565                 namesp = attr_system_names[i];
2566                 if (!namesp->attr_exists || !namesp->attr_exists(vp))
2567                         continue;
2568                 error = attr_generic_listadd(&attr_system, namesp,
2569                                                 data, size, result);
2570                 if (error)
2571                         break;
2572         }
2573         return error;
2574 }
2575
2576 int
2577 attr_generic_list(
2578         bhv_vnode_t *vp, void *data, size_t size, int xflags, ssize_t *result)
2579 {
2580         attrlist_cursor_kern_t  cursor = { 0 };
2581         int                     error;
2582
2583         error = bhv_vop_attr_list(vp, data, size, xflags, &cursor, NULL);
2584         if (error > 0)
2585                 return -error;
2586         *result = -error;
2587         return attr_system_list(vp, data, size, result);
2588 }
2589
2590 attrnames_t *
2591 attr_lookup_namespace(
2592         char                    *name,
2593         struct attrnames        **names,
2594         int                     nnames)
2595 {
2596         int                     i;
2597
2598         for (i = 0; i < nnames; i++)
2599                 if (!strncmp(name, names[i]->attr_name, names[i]->attr_namelen))
2600                         return names[i];
2601         return NULL;
2602 }
2603
2604 /*
2605  * Some checks to prevent people abusing EAs to get over quota:
2606  * - Don't allow modifying user EAs on devices/symlinks;
2607  * - Don't allow modifying user EAs if sticky bit set;
2608  */
2609 STATIC int
2610 attr_user_capable(
2611         bhv_vnode_t     *vp,
2612         cred_t          *cred)
2613 {
2614         struct inode    *inode = vn_to_inode(vp);
2615
2616         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2617                 return -EPERM;
2618         if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) &&
2619             !capable(CAP_SYS_ADMIN))
2620                 return -EPERM;
2621         if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
2622             (current_fsuid(cred) != inode->i_uid) && !capable(CAP_FOWNER))
2623                 return -EPERM;
2624         return 0;
2625 }
2626
2627 STATIC int
2628 attr_trusted_capable(
2629         bhv_vnode_t     *vp,
2630         cred_t          *cred)
2631 {
2632         struct inode    *inode = vn_to_inode(vp);
2633
2634         if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
2635                 return -EPERM;
2636         if (!capable(CAP_SYS_ADMIN))
2637                 return -EPERM;
2638         return 0;
2639 }
2640
2641 STATIC int
2642 attr_secure_capable(
2643         bhv_vnode_t     *vp,
2644         cred_t          *cred)
2645 {
2646         return -ENOSECURITY;
2647 }
2648
2649 STATIC int
2650 attr_system_set(
2651         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2652 {
2653         attrnames_t     *namesp;
2654         int             error;
2655
2656         if (xflags & ATTR_CREATE)
2657                 return -EINVAL;
2658
2659         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2660         if (!namesp)
2661                 return -EOPNOTSUPP;
2662         error = namesp->attr_set(vp, name, data, size, xflags);
2663         if (!error)
2664                 error = vn_revalidate(vp);
2665         return error;
2666 }
2667
2668 STATIC int
2669 attr_system_get(
2670         bhv_vnode_t *vp, char *name, void *data, size_t size, int xflags)
2671 {
2672         attrnames_t     *namesp;
2673
2674         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2675         if (!namesp)
2676                 return -EOPNOTSUPP;
2677         return namesp->attr_get(vp, name, data, size, xflags);
2678 }
2679
2680 STATIC int
2681 attr_system_remove(
2682         bhv_vnode_t *vp, char *name, int xflags)
2683 {
2684         attrnames_t     *namesp;
2685
2686         namesp = attr_lookup_namespace(name, attr_system_names, ATTR_SYSCOUNT);
2687         if (!namesp)
2688                 return -EOPNOTSUPP;
2689         return namesp->attr_remove(vp, name, xflags);
2690 }
2691
2692 struct attrnames attr_system = {
2693         .attr_name      = "system.",
2694         .attr_namelen   = sizeof("system.") - 1,
2695         .attr_flag      = ATTR_SYSTEM,
2696         .attr_get       = attr_system_get,
2697         .attr_set       = attr_system_set,
2698         .attr_remove    = attr_system_remove,
2699         .attr_capable   = (attrcapable_t)fs_noerr,
2700 };
2701
2702 struct attrnames attr_trusted = {
2703         .attr_name      = "trusted.",
2704         .attr_namelen   = sizeof("trusted.") - 1,
2705         .attr_flag      = ATTR_ROOT,
2706         .attr_get       = attr_generic_get,
2707         .attr_set       = attr_generic_set,
2708         .attr_remove    = attr_generic_remove,
2709         .attr_capable   = attr_trusted_capable,
2710 };
2711
2712 struct attrnames attr_secure = {
2713         .attr_name      = "security.",
2714         .attr_namelen   = sizeof("security.") - 1,
2715         .attr_flag      = ATTR_SECURE,
2716         .attr_get       = attr_generic_get,
2717         .attr_set       = attr_generic_set,
2718         .attr_remove    = attr_generic_remove,
2719         .attr_capable   = attr_secure_capable,
2720 };
2721
2722 struct attrnames attr_user = {
2723         .attr_name      = "user.",
2724         .attr_namelen   = sizeof("user.") - 1,
2725         .attr_get       = attr_generic_get,
2726         .attr_set       = attr_generic_set,
2727         .attr_remove    = attr_generic_remove,
2728         .attr_capable   = attr_user_capable,
2729 };
2730
2731 struct attrnames *attr_namespaces[] =
2732         { &attr_system, &attr_trusted, &attr_secure, &attr_user };