Btrfs: symlinks and hard links
[linux-2.6] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
15 #include "ctree.h"
16 #include "disk-io.h"
17 #include "transaction.h"
18 #include "btrfs_inode.h"
19 #include "ioctl.h"
20
21 struct btrfs_iget_args {
22         u64 ino;
23         struct btrfs_root *root;
24 };
25
26 #define BTRFS_SUPER_MAGIC 0x9123682E
27
28 static struct inode_operations btrfs_dir_inode_operations;
29 static struct inode_operations btrfs_symlink_inode_operations;
30 static struct inode_operations btrfs_dir_ro_inode_operations;
31 static struct super_operations btrfs_super_ops;
32 static struct file_operations btrfs_dir_file_operations;
33 static struct inode_operations btrfs_file_inode_operations;
34 static struct address_space_operations btrfs_aops;
35 static struct address_space_operations btrfs_symlink_aops;
36 static struct file_operations btrfs_file_operations;
37
38 static void btrfs_read_locked_inode(struct inode *inode)
39 {
40         struct btrfs_path *path;
41         struct btrfs_inode_item *inode_item;
42         struct btrfs_root *root = BTRFS_I(inode)->root;
43         struct btrfs_key location;
44         struct btrfs_block_group_cache *alloc_group;
45         u64 alloc_group_block;
46         int ret;
47
48         path = btrfs_alloc_path();
49         BUG_ON(!path);
50         btrfs_init_path(path);
51         mutex_lock(&root->fs_info->fs_mutex);
52
53         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
54         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
55         if (ret) {
56                 btrfs_free_path(path);
57                 goto make_bad;
58         }
59         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
60                                   path->slots[0],
61                                   struct btrfs_inode_item);
62
63         inode->i_mode = btrfs_inode_mode(inode_item);
64         inode->i_nlink = btrfs_inode_nlink(inode_item);
65         inode->i_uid = btrfs_inode_uid(inode_item);
66         inode->i_gid = btrfs_inode_gid(inode_item);
67         inode->i_size = btrfs_inode_size(inode_item);
68         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
69         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
70         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
71         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
72         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
73         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
74         inode->i_blocks = btrfs_inode_nblocks(inode_item);
75         inode->i_generation = btrfs_inode_generation(inode_item);
76         alloc_group_block = btrfs_inode_block_group(inode_item);
77         ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
78                                      (void **)&alloc_group,
79                                      alloc_group_block, 1);
80         BUG_ON(!ret);
81         BTRFS_I(inode)->block_group = alloc_group;
82
83         btrfs_free_path(path);
84         inode_item = NULL;
85
86         mutex_unlock(&root->fs_info->fs_mutex);
87
88         switch (inode->i_mode & S_IFMT) {
89 #if 0
90         default:
91                 init_special_inode(inode, inode->i_mode,
92                                    btrfs_inode_rdev(inode_item));
93                 break;
94 #endif
95         case S_IFREG:
96                 inode->i_mapping->a_ops = &btrfs_aops;
97                 inode->i_fop = &btrfs_file_operations;
98                 inode->i_op = &btrfs_file_inode_operations;
99                 break;
100         case S_IFDIR:
101                 inode->i_fop = &btrfs_dir_file_operations;
102                 if (root == root->fs_info->tree_root)
103                         inode->i_op = &btrfs_dir_ro_inode_operations;
104                 else
105                         inode->i_op = &btrfs_dir_inode_operations;
106                 break;
107         case S_IFLNK:
108                 inode->i_op = &btrfs_symlink_inode_operations;
109                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
110                 break;
111         }
112         return;
113
114 make_bad:
115         btrfs_release_path(root, path);
116         btrfs_free_path(path);
117         mutex_unlock(&root->fs_info->fs_mutex);
118         make_bad_inode(inode);
119 }
120
121 static void fill_inode_item(struct btrfs_inode_item *item,
122                             struct inode *inode)
123 {
124         btrfs_set_inode_uid(item, inode->i_uid);
125         btrfs_set_inode_gid(item, inode->i_gid);
126         btrfs_set_inode_size(item, inode->i_size);
127         btrfs_set_inode_mode(item, inode->i_mode);
128         btrfs_set_inode_nlink(item, inode->i_nlink);
129         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
130         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
131         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
132         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
133         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
134         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
135         btrfs_set_inode_nblocks(item, inode->i_blocks);
136         btrfs_set_inode_generation(item, inode->i_generation);
137         btrfs_set_inode_block_group(item,
138                                     BTRFS_I(inode)->block_group->key.objectid);
139 }
140
141 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
142                               struct btrfs_root *root,
143                               struct inode *inode)
144 {
145         struct btrfs_inode_item *inode_item;
146         struct btrfs_path *path;
147         int ret;
148
149         path = btrfs_alloc_path();
150         BUG_ON(!path);
151         btrfs_init_path(path);
152         ret = btrfs_lookup_inode(trans, root, path,
153                                  &BTRFS_I(inode)->location, 1);
154         if (ret) {
155                 if (ret > 0)
156                         ret = -ENOENT;
157                 goto failed;
158         }
159
160         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
161                                   path->slots[0],
162                                   struct btrfs_inode_item);
163
164         fill_inode_item(inode_item, inode);
165         btrfs_mark_buffer_dirty(path->nodes[0]);
166         ret = 0;
167 failed:
168         btrfs_release_path(root, path);
169         btrfs_free_path(path);
170         return ret;
171 }
172
173
174 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
175                               struct btrfs_root *root,
176                               struct inode *dir,
177                               struct dentry *dentry)
178 {
179         struct btrfs_path *path;
180         const char *name = dentry->d_name.name;
181         int name_len = dentry->d_name.len;
182         int ret = 0;
183         u64 objectid;
184         struct btrfs_dir_item *di;
185
186         path = btrfs_alloc_path();
187         BUG_ON(!path);
188         btrfs_init_path(path);
189         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
190                                     name, name_len, -1);
191         if (IS_ERR(di)) {
192                 ret = PTR_ERR(di);
193                 goto err;
194         }
195         if (!di) {
196                 ret = -ENOENT;
197                 goto err;
198         }
199         objectid = btrfs_disk_key_objectid(&di->location);
200         ret = btrfs_delete_one_dir_name(trans, root, path, di);
201         BUG_ON(ret);
202         btrfs_release_path(root, path);
203
204         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
205                                          objectid, name, name_len, -1);
206         if (IS_ERR(di)) {
207                 ret = PTR_ERR(di);
208                 goto err;
209         }
210         if (!di) {
211                 ret = -ENOENT;
212                 goto err;
213         }
214         ret = btrfs_delete_one_dir_name(trans, root, path, di);
215         BUG_ON(ret);
216
217         dentry->d_inode->i_ctime = dir->i_ctime;
218 err:
219         btrfs_free_path(path);
220         if (!ret) {
221                 dir->i_size -= name_len * 2;
222                 btrfs_update_inode(trans, root, dir);
223                 drop_nlink(dentry->d_inode);
224                 btrfs_update_inode(trans, root, dentry->d_inode);
225                 dir->i_sb->s_dirt = 1;
226         }
227         return ret;
228 }
229
230 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
231 {
232         struct btrfs_root *root;
233         struct btrfs_trans_handle *trans;
234         int ret;
235
236         root = BTRFS_I(dir)->root;
237         mutex_lock(&root->fs_info->fs_mutex);
238         trans = btrfs_start_transaction(root, 1);
239         btrfs_set_trans_block_group(trans, dir);
240         ret = btrfs_unlink_trans(trans, root, dir, dentry);
241         btrfs_end_transaction(trans, root);
242         mutex_unlock(&root->fs_info->fs_mutex);
243         btrfs_btree_balance_dirty(root);
244         return ret;
245 }
246
247 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
248 {
249         struct inode *inode = dentry->d_inode;
250         int err;
251         int ret;
252         struct btrfs_root *root = BTRFS_I(dir)->root;
253         struct btrfs_path *path;
254         struct btrfs_key key;
255         struct btrfs_trans_handle *trans;
256         struct btrfs_key found_key;
257         int found_type;
258         struct btrfs_leaf *leaf;
259         char *goodnames = "..";
260
261         path = btrfs_alloc_path();
262         BUG_ON(!path);
263         btrfs_init_path(path);
264         mutex_lock(&root->fs_info->fs_mutex);
265         trans = btrfs_start_transaction(root, 1);
266         btrfs_set_trans_block_group(trans, dir);
267         key.objectid = inode->i_ino;
268         key.offset = (u64)-1;
269         key.flags = (u32)-1;
270         while(1) {
271                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
272                 if (ret < 0) {
273                         err = ret;
274                         goto out;
275                 }
276                 BUG_ON(ret == 0);
277                 if (path->slots[0] == 0) {
278                         err = -ENOENT;
279                         goto out;
280                 }
281                 path->slots[0]--;
282                 leaf = btrfs_buffer_leaf(path->nodes[0]);
283                 btrfs_disk_key_to_cpu(&found_key,
284                                       &leaf->items[path->slots[0]].key);
285                 found_type = btrfs_key_type(&found_key);
286                 if (found_key.objectid != inode->i_ino) {
287                         err = -ENOENT;
288                         goto out;
289                 }
290                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
291                      found_type != BTRFS_DIR_INDEX_KEY) ||
292                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
293                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
294                         err = -ENOTEMPTY;
295                         goto out;
296                 }
297                 ret = btrfs_del_item(trans, root, path);
298                 BUG_ON(ret);
299
300                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
301                         break;
302                 btrfs_release_path(root, path);
303         }
304         ret = 0;
305         btrfs_release_path(root, path);
306
307         /* now the directory is empty */
308         err = btrfs_unlink_trans(trans, root, dir, dentry);
309         if (!err) {
310                 inode->i_size = 0;
311         }
312 out:
313         btrfs_release_path(root, path);
314         btrfs_free_path(path);
315         mutex_unlock(&root->fs_info->fs_mutex);
316         ret = btrfs_end_transaction(trans, root);
317         btrfs_btree_balance_dirty(root);
318         if (ret && !err)
319                 err = ret;
320         return err;
321 }
322
323 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
324                             struct btrfs_root *root,
325                             struct inode *inode)
326 {
327         struct btrfs_path *path;
328         int ret;
329
330         clear_inode(inode);
331
332         path = btrfs_alloc_path();
333         BUG_ON(!path);
334         btrfs_init_path(path);
335         ret = btrfs_lookup_inode(trans, root, path,
336                                  &BTRFS_I(inode)->location, -1);
337         BUG_ON(ret);
338         ret = btrfs_del_item(trans, root, path);
339         BUG_ON(ret);
340         btrfs_free_path(path);
341         return ret;
342 }
343
344 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
345                            u64 objectid)
346 {
347         struct btrfs_node *node;
348         int i;
349         int nritems;
350         u64 item_objectid;
351         u64 blocknr;
352         int slot;
353         int ret;
354
355         if (!path->nodes[1])
356                 return;
357         node = btrfs_buffer_node(path->nodes[1]);
358         slot = path->slots[1];
359         if (slot == 0)
360                 return;
361         nritems = btrfs_header_nritems(&node->header);
362         for (i = slot - 1; i >= 0; i--) {
363                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
364                 if (item_objectid != objectid)
365                         break;
366                 blocknr = btrfs_node_blockptr(node, i);
367                 ret = readahead_tree_block(root, blocknr);
368                 if (ret)
369                         break;
370         }
371 }
372
373 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
374                                    struct btrfs_root *root,
375                                    struct inode *inode)
376 {
377         int ret;
378         struct btrfs_path *path;
379         struct btrfs_key key;
380         struct btrfs_disk_key *found_key;
381         u32 found_type;
382         struct btrfs_leaf *leaf;
383         struct btrfs_file_extent_item *fi = NULL;
384         u64 extent_start = 0;
385         u64 extent_num_blocks = 0;
386         int found_extent;
387
388         path = btrfs_alloc_path();
389         BUG_ON(!path);
390         /* FIXME, add redo link to tree so we don't leak on crash */
391         key.objectid = inode->i_ino;
392         key.offset = (u64)-1;
393         key.flags = (u32)-1;
394         while(1) {
395                 btrfs_init_path(path);
396                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
397                 if (ret < 0) {
398                         goto error;
399                 }
400                 if (ret > 0) {
401                         BUG_ON(path->slots[0] == 0);
402                         path->slots[0]--;
403                 }
404                 reada_truncate(root, path, inode->i_ino);
405                 leaf = btrfs_buffer_leaf(path->nodes[0]);
406                 found_key = &leaf->items[path->slots[0]].key;
407                 found_type = btrfs_disk_key_type(found_key);
408                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
409                         break;
410                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
411                     found_type != BTRFS_DIR_ITEM_KEY &&
412                     found_type != BTRFS_DIR_INDEX_KEY &&
413                     found_type != BTRFS_EXTENT_DATA_KEY)
414                         break;
415                 if (btrfs_disk_key_offset(found_key) < inode->i_size)
416                         break;
417                 found_extent = 0;
418                 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
419                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
420                                             path->slots[0],
421                                             struct btrfs_file_extent_item);
422                         if (btrfs_file_extent_type(fi) !=
423                             BTRFS_FILE_EXTENT_INLINE) {
424                                 extent_start =
425                                         btrfs_file_extent_disk_blocknr(fi);
426                                 extent_num_blocks =
427                                         btrfs_file_extent_disk_num_blocks(fi);
428                                 /* FIXME blocksize != 4096 */
429                                 inode->i_blocks -=
430                                         btrfs_file_extent_num_blocks(fi) << 3;
431                                 found_extent = 1;
432                         }
433                 }
434                 ret = btrfs_del_item(trans, root, path);
435                 BUG_ON(ret);
436                 btrfs_release_path(root, path);
437                 if (found_extent) {
438                         ret = btrfs_free_extent(trans, root, extent_start,
439                                                 extent_num_blocks, 0);
440                         BUG_ON(ret);
441                 }
442         }
443         ret = 0;
444 error:
445         btrfs_release_path(root, path);
446         btrfs_free_path(path);
447         inode->i_sb->s_dirt = 1;
448         return ret;
449 }
450
451 static void btrfs_delete_inode(struct inode *inode)
452 {
453         struct btrfs_trans_handle *trans;
454         struct btrfs_root *root = BTRFS_I(inode)->root;
455         int ret;
456
457         truncate_inode_pages(&inode->i_data, 0);
458         if (is_bad_inode(inode)) {
459                 goto no_delete;
460         }
461         inode->i_size = 0;
462         mutex_lock(&root->fs_info->fs_mutex);
463         trans = btrfs_start_transaction(root, 1);
464         btrfs_set_trans_block_group(trans, inode);
465         ret = btrfs_truncate_in_trans(trans, root, inode);
466         BUG_ON(ret);
467         btrfs_free_inode(trans, root, inode);
468         btrfs_end_transaction(trans, root);
469         mutex_unlock(&root->fs_info->fs_mutex);
470         btrfs_btree_balance_dirty(root);
471         return;
472 no_delete:
473         clear_inode(inode);
474 }
475
476 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
477                                struct btrfs_key *location)
478 {
479         const char *name = dentry->d_name.name;
480         int namelen = dentry->d_name.len;
481         struct btrfs_dir_item *di;
482         struct btrfs_path *path;
483         struct btrfs_root *root = BTRFS_I(dir)->root;
484         int ret;
485
486         path = btrfs_alloc_path();
487         BUG_ON(!path);
488         btrfs_init_path(path);
489         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
490                                     namelen, 0);
491         if (!di || IS_ERR(di)) {
492                 location->objectid = 0;
493                 ret = 0;
494                 goto out;
495         }
496         btrfs_disk_key_to_cpu(location, &di->location);
497 out:
498         btrfs_release_path(root, path);
499         btrfs_free_path(path);
500         return ret;
501 }
502
503 static int fixup_tree_root_location(struct btrfs_root *root,
504                              struct btrfs_key *location,
505                              struct btrfs_root **sub_root)
506 {
507         struct btrfs_path *path;
508         struct btrfs_root_item *ri;
509
510         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
511                 return 0;
512         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
513                 return 0;
514
515         path = btrfs_alloc_path();
516         BUG_ON(!path);
517         mutex_lock(&root->fs_info->fs_mutex);
518
519         *sub_root = btrfs_read_fs_root(root->fs_info, location);
520         if (IS_ERR(*sub_root))
521                 return PTR_ERR(*sub_root);
522
523         ri = &(*sub_root)->root_item;
524         location->objectid = btrfs_root_dirid(ri);
525         location->flags = 0;
526         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
527         location->offset = 0;
528
529         btrfs_free_path(path);
530         mutex_unlock(&root->fs_info->fs_mutex);
531         return 0;
532 }
533
534 static int btrfs_init_locked_inode(struct inode *inode, void *p)
535 {
536         struct btrfs_iget_args *args = p;
537         inode->i_ino = args->ino;
538         BTRFS_I(inode)->root = args->root;
539         return 0;
540 }
541
542 static int btrfs_find_actor(struct inode *inode, void *opaque)
543 {
544         struct btrfs_iget_args *args = opaque;
545         return (args->ino == inode->i_ino &&
546                 args->root == BTRFS_I(inode)->root);
547 }
548
549 static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
550                                        struct btrfs_root *root)
551 {
552         struct inode *inode;
553         struct btrfs_iget_args args;
554         args.ino = objectid;
555         args.root = root;
556
557         inode = iget5_locked(s, objectid, btrfs_find_actor,
558                              btrfs_init_locked_inode,
559                              (void *)&args);
560         return inode;
561 }
562
563 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
564                                    struct nameidata *nd)
565 {
566         struct inode * inode;
567         struct btrfs_inode *bi = BTRFS_I(dir);
568         struct btrfs_root *root = bi->root;
569         struct btrfs_root *sub_root = root;
570         struct btrfs_key location;
571         int ret;
572
573         if (dentry->d_name.len > BTRFS_NAME_LEN)
574                 return ERR_PTR(-ENAMETOOLONG);
575         mutex_lock(&root->fs_info->fs_mutex);
576         ret = btrfs_inode_by_name(dir, dentry, &location);
577         mutex_unlock(&root->fs_info->fs_mutex);
578         if (ret < 0)
579                 return ERR_PTR(ret);
580         inode = NULL;
581         if (location.objectid) {
582                 ret = fixup_tree_root_location(root, &location, &sub_root);
583                 if (ret < 0)
584                         return ERR_PTR(ret);
585                 if (ret > 0)
586                         return ERR_PTR(-ENOENT);
587                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
588                                           sub_root);
589                 if (!inode)
590                         return ERR_PTR(-EACCES);
591                 if (inode->i_state & I_NEW) {
592                         if (sub_root != root) {
593 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
594                                 igrab(inode);
595                                 sub_root->inode = inode;
596                         }
597                         BTRFS_I(inode)->root = sub_root;
598                         memcpy(&BTRFS_I(inode)->location, &location,
599                                sizeof(location));
600                         btrfs_read_locked_inode(inode);
601                         unlock_new_inode(inode);
602                 }
603         }
604         return d_splice_alias(inode, dentry);
605 }
606
607 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
608                          u64 objectid)
609 {
610         struct btrfs_node *node;
611         int i;
612         u32 nritems;
613         u64 item_objectid;
614         u64 blocknr;
615         int slot;
616         int ret;
617
618         if (!path->nodes[1])
619                 return;
620         node = btrfs_buffer_node(path->nodes[1]);
621         slot = path->slots[1];
622         nritems = btrfs_header_nritems(&node->header);
623         for (i = slot + 1; i < nritems; i++) {
624                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
625                 if (item_objectid != objectid)
626                         break;
627                 blocknr = btrfs_node_blockptr(node, i);
628                 ret = readahead_tree_block(root, blocknr);
629                 if (ret)
630                         break;
631         }
632 }
633
634 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
635 {
636         struct inode *inode = filp->f_path.dentry->d_inode;
637         struct btrfs_root *root = BTRFS_I(inode)->root;
638         struct btrfs_item *item;
639         struct btrfs_dir_item *di;
640         struct btrfs_key key;
641         struct btrfs_path *path;
642         int ret;
643         u32 nritems;
644         struct btrfs_leaf *leaf;
645         int slot;
646         int advance;
647         unsigned char d_type = DT_UNKNOWN;
648         int over = 0;
649         u32 di_cur;
650         u32 di_total;
651         u32 di_len;
652         int key_type = BTRFS_DIR_INDEX_KEY;
653
654         /* FIXME, use a real flag for deciding about the key type */
655         if (root->fs_info->tree_root == root)
656                 key_type = BTRFS_DIR_ITEM_KEY;
657         mutex_lock(&root->fs_info->fs_mutex);
658         key.objectid = inode->i_ino;
659         key.flags = 0;
660         btrfs_set_key_type(&key, key_type);
661         key.offset = filp->f_pos;
662         path = btrfs_alloc_path();
663         btrfs_init_path(path);
664         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
665         if (ret < 0)
666                 goto err;
667         advance = 0;
668         reada_leaves(root, path, inode->i_ino);
669         while(1) {
670                 leaf = btrfs_buffer_leaf(path->nodes[0]);
671                 nritems = btrfs_header_nritems(&leaf->header);
672                 slot = path->slots[0];
673                 if (advance || slot >= nritems) {
674                         if (slot >= nritems -1) {
675                                 reada_leaves(root, path, inode->i_ino);
676                                 ret = btrfs_next_leaf(root, path);
677                                 if (ret)
678                                         break;
679                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
680                                 nritems = btrfs_header_nritems(&leaf->header);
681                                 slot = path->slots[0];
682                         } else {
683                                 slot++;
684                                 path->slots[0]++;
685                         }
686                 }
687                 advance = 1;
688                 item = leaf->items + slot;
689                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
690                         break;
691                 if (btrfs_disk_key_type(&item->key) != key_type)
692                         break;
693                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
694                         continue;
695                 filp->f_pos = btrfs_disk_key_offset(&item->key);
696                 advance = 1;
697                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
698                 di_cur = 0;
699                 di_total = btrfs_item_size(leaf->items + slot);
700                 while(di_cur < di_total) {
701                         over = filldir(dirent, (const char *)(di + 1),
702                                        btrfs_dir_name_len(di),
703                                        btrfs_disk_key_offset(&item->key),
704                                        btrfs_disk_key_objectid(&di->location),
705                                        d_type);
706                         if (over)
707                                 goto nopos;
708                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
709                         di_cur += di_len;
710                         di = (struct btrfs_dir_item *)((char *)di + di_len);
711                 }
712         }
713         filp->f_pos++;
714 nopos:
715         ret = 0;
716 err:
717         btrfs_release_path(root, path);
718         btrfs_free_path(path);
719         mutex_unlock(&root->fs_info->fs_mutex);
720         return ret;
721 }
722
723 static void btrfs_put_super (struct super_block * sb)
724 {
725         struct btrfs_root *root = btrfs_sb(sb);
726         int ret;
727
728         ret = close_ctree(root);
729         if (ret) {
730                 printk("close ctree returns %d\n", ret);
731         }
732         sb->s_fs_info = NULL;
733 }
734
735 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
736 {
737         struct inode * inode;
738         struct dentry * root_dentry;
739         struct btrfs_super_block *disk_super;
740         struct btrfs_root *tree_root;
741         struct btrfs_inode *bi;
742
743         sb->s_maxbytes = MAX_LFS_FILESIZE;
744         sb->s_magic = BTRFS_SUPER_MAGIC;
745         sb->s_op = &btrfs_super_ops;
746         sb->s_time_gran = 1;
747
748         tree_root = open_ctree(sb);
749
750         if (!tree_root) {
751                 printk("btrfs: open_ctree failed\n");
752                 return -EIO;
753         }
754         sb->s_fs_info = tree_root;
755         disk_super = tree_root->fs_info->disk_super;
756         printk("read in super total blocks %Lu root %Lu\n",
757                btrfs_super_total_blocks(disk_super),
758                btrfs_super_root_dir(disk_super));
759
760         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
761                                   tree_root);
762         bi = BTRFS_I(inode);
763         bi->location.objectid = inode->i_ino;
764         bi->location.offset = 0;
765         bi->location.flags = 0;
766         bi->root = tree_root;
767         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
768
769         if (!inode)
770                 return -ENOMEM;
771         if (inode->i_state & I_NEW) {
772                 btrfs_read_locked_inode(inode);
773                 unlock_new_inode(inode);
774         }
775
776         root_dentry = d_alloc_root(inode);
777         if (!root_dentry) {
778                 iput(inode);
779                 return -ENOMEM;
780         }
781         sb->s_root = root_dentry;
782
783         return 0;
784 }
785
786 static int btrfs_write_inode(struct inode *inode, int wait)
787 {
788         struct btrfs_root *root = BTRFS_I(inode)->root;
789         struct btrfs_trans_handle *trans;
790         int ret = 0;
791
792         if (wait) {
793                 mutex_lock(&root->fs_info->fs_mutex);
794                 trans = btrfs_start_transaction(root, 1);
795                 btrfs_set_trans_block_group(trans, inode);
796                 ret = btrfs_commit_transaction(trans, root);
797                 mutex_unlock(&root->fs_info->fs_mutex);
798         }
799         return ret;
800 }
801
802 static void btrfs_dirty_inode(struct inode *inode)
803 {
804         struct btrfs_root *root = BTRFS_I(inode)->root;
805         struct btrfs_trans_handle *trans;
806
807         mutex_lock(&root->fs_info->fs_mutex);
808         trans = btrfs_start_transaction(root, 1);
809         btrfs_set_trans_block_group(trans, inode);
810         btrfs_update_inode(trans, root, inode);
811         btrfs_end_transaction(trans, root);
812         mutex_unlock(&root->fs_info->fs_mutex);
813         btrfs_btree_balance_dirty(root);
814 }
815
816 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
817                                      struct btrfs_root *root,
818                                      u64 objectid,
819                                      struct btrfs_block_group_cache *group,
820                                      int mode)
821 {
822         struct inode *inode;
823         struct btrfs_inode_item inode_item;
824         struct btrfs_key *location;
825         int ret;
826         int owner;
827
828         inode = new_inode(root->fs_info->sb);
829         if (!inode)
830                 return ERR_PTR(-ENOMEM);
831
832         BTRFS_I(inode)->root = root;
833         if (mode & S_IFDIR)
834                 owner = 0;
835         else
836                 owner = 1;
837         group = btrfs_find_block_group(root, group, 0, 0, owner);
838         BTRFS_I(inode)->block_group = group;
839
840         inode->i_uid = current->fsuid;
841         inode->i_gid = current->fsgid;
842         inode->i_mode = mode;
843         inode->i_ino = objectid;
844         inode->i_blocks = 0;
845         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
846         fill_inode_item(&inode_item, inode);
847         location = &BTRFS_I(inode)->location;
848         location->objectid = objectid;
849         location->flags = 0;
850         location->offset = 0;
851         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
852
853         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
854         BUG_ON(ret);
855
856         insert_inode_hash(inode);
857         return inode;
858 }
859
860 static int btrfs_add_link(struct btrfs_trans_handle *trans,
861                             struct dentry *dentry, struct inode *inode)
862 {
863         int ret;
864         struct btrfs_key key;
865         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
866         key.objectid = inode->i_ino;
867         key.flags = 0;
868         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
869         key.offset = 0;
870
871         ret = btrfs_insert_dir_item(trans, root,
872                                     dentry->d_name.name, dentry->d_name.len,
873                                     dentry->d_parent->d_inode->i_ino,
874                                     &key, 0);
875         if (ret == 0) {
876                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
877                 ret = btrfs_update_inode(trans, root,
878                                          dentry->d_parent->d_inode);
879         }
880         return ret;
881 }
882
883 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
884                             struct dentry *dentry, struct inode *inode)
885 {
886         int err = btrfs_add_link(trans, dentry, inode);
887         if (!err) {
888                 d_instantiate(dentry, inode);
889                 return 0;
890         }
891         if (err > 0)
892                 err = -EEXIST;
893         return err;
894 }
895
896 static int btrfs_create(struct inode *dir, struct dentry *dentry,
897                         int mode, struct nameidata *nd)
898 {
899         struct btrfs_trans_handle *trans;
900         struct btrfs_root *root = BTRFS_I(dir)->root;
901         struct inode *inode;
902         int err;
903         int drop_inode = 0;
904         u64 objectid;
905
906         mutex_lock(&root->fs_info->fs_mutex);
907         trans = btrfs_start_transaction(root, 1);
908         btrfs_set_trans_block_group(trans, dir);
909
910         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
911         if (err) {
912                 err = -ENOSPC;
913                 goto out_unlock;
914         }
915
916         inode = btrfs_new_inode(trans, root, objectid,
917                                 BTRFS_I(dir)->block_group, mode);
918         err = PTR_ERR(inode);
919         if (IS_ERR(inode))
920                 goto out_unlock;
921
922         btrfs_set_trans_block_group(trans, inode);
923         err = btrfs_add_nondir(trans, dentry, inode);
924         if (err)
925                 drop_inode = 1;
926         else {
927                 inode->i_mapping->a_ops = &btrfs_aops;
928                 inode->i_fop = &btrfs_file_operations;
929                 inode->i_op = &btrfs_file_inode_operations;
930         }
931         dir->i_sb->s_dirt = 1;
932         btrfs_update_inode_block_group(trans, inode);
933         btrfs_update_inode_block_group(trans, dir);
934 out_unlock:
935         btrfs_end_transaction(trans, root);
936         mutex_unlock(&root->fs_info->fs_mutex);
937
938         if (drop_inode) {
939                 inode_dec_link_count(inode);
940                 iput(inode);
941         }
942         btrfs_btree_balance_dirty(root);
943         return err;
944 }
945
946 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
947                       struct dentry *dentry)
948 {
949         struct btrfs_trans_handle *trans;
950         struct btrfs_root *root = BTRFS_I(dir)->root;
951         struct inode *inode = old_dentry->d_inode;
952         int err;
953         int drop_inode = 0;
954
955         if (inode->i_nlink == 0)
956                 return -ENOENT;
957
958         inc_nlink(inode);
959         mutex_lock(&root->fs_info->fs_mutex);
960         trans = btrfs_start_transaction(root, 1);
961         btrfs_set_trans_block_group(trans, dir);
962         atomic_inc(&inode->i_count);
963         err = btrfs_add_nondir(trans, dentry, inode);
964         if (err)
965                 drop_inode = 1;
966         dir->i_sb->s_dirt = 1;
967         btrfs_update_inode_block_group(trans, dir);
968         btrfs_update_inode(trans, root, inode);
969
970         btrfs_end_transaction(trans, root);
971         mutex_unlock(&root->fs_info->fs_mutex);
972
973         if (drop_inode) {
974                 inode_dec_link_count(inode);
975                 iput(inode);
976         }
977         btrfs_btree_balance_dirty(root);
978         return err;
979 }
980
981 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
982                                 struct btrfs_root *root,
983                                 u64 objectid, u64 dirid)
984 {
985         int ret;
986         char buf[2];
987         struct btrfs_key key;
988
989         buf[0] = '.';
990         buf[1] = '.';
991
992         key.objectid = objectid;
993         key.offset = 0;
994         key.flags = 0;
995         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
996
997         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
998                                     &key, 1);
999         if (ret)
1000                 goto error;
1001         key.objectid = dirid;
1002         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1003                                     &key, 1);
1004         if (ret)
1005                 goto error;
1006 error:
1007         return ret;
1008 }
1009
1010 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1011 {
1012         struct inode *inode;
1013         struct btrfs_trans_handle *trans;
1014         struct btrfs_root *root = BTRFS_I(dir)->root;
1015         int err = 0;
1016         int drop_on_err = 0;
1017         u64 objectid;
1018
1019         mutex_lock(&root->fs_info->fs_mutex);
1020         trans = btrfs_start_transaction(root, 1);
1021         btrfs_set_trans_block_group(trans, dir);
1022         if (IS_ERR(trans)) {
1023                 err = PTR_ERR(trans);
1024                 goto out_unlock;
1025         }
1026
1027         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1028         if (err) {
1029                 err = -ENOSPC;
1030                 goto out_unlock;
1031         }
1032
1033         inode = btrfs_new_inode(trans, root, objectid,
1034                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1035         if (IS_ERR(inode)) {
1036                 err = PTR_ERR(inode);
1037                 goto out_fail;
1038         }
1039         drop_on_err = 1;
1040         inode->i_op = &btrfs_dir_inode_operations;
1041         inode->i_fop = &btrfs_dir_file_operations;
1042         btrfs_set_trans_block_group(trans, inode);
1043
1044         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1045         if (err)
1046                 goto out_fail;
1047
1048         inode->i_size = 6;
1049         err = btrfs_update_inode(trans, root, inode);
1050         if (err)
1051                 goto out_fail;
1052         err = btrfs_add_link(trans, dentry, inode);
1053         if (err)
1054                 goto out_fail;
1055         d_instantiate(dentry, inode);
1056         drop_on_err = 0;
1057         dir->i_sb->s_dirt = 1;
1058         btrfs_update_inode_block_group(trans, inode);
1059         btrfs_update_inode_block_group(trans, dir);
1060
1061 out_fail:
1062         btrfs_end_transaction(trans, root);
1063 out_unlock:
1064         mutex_unlock(&root->fs_info->fs_mutex);
1065         if (drop_on_err)
1066                 iput(inode);
1067         btrfs_btree_balance_dirty(root);
1068         return err;
1069 }
1070
1071 static int btrfs_sync_file(struct file *file,
1072                            struct dentry *dentry, int datasync)
1073 {
1074         struct inode *inode = dentry->d_inode;
1075         struct btrfs_root *root = BTRFS_I(inode)->root;
1076         int ret;
1077         struct btrfs_trans_handle *trans;
1078
1079         mutex_lock(&root->fs_info->fs_mutex);
1080         trans = btrfs_start_transaction(root, 1);
1081         if (!trans) {
1082                 ret = -ENOMEM;
1083                 goto out;
1084         }
1085         ret = btrfs_commit_transaction(trans, root);
1086         mutex_unlock(&root->fs_info->fs_mutex);
1087 out:
1088         return ret > 0 ? EIO : ret;
1089 }
1090
1091 static int btrfs_sync_fs(struct super_block *sb, int wait)
1092 {
1093         struct btrfs_trans_handle *trans;
1094         struct btrfs_root *root;
1095         int ret;
1096         root = btrfs_sb(sb);
1097
1098         sb->s_dirt = 0;
1099         if (!wait) {
1100                 filemap_flush(root->fs_info->btree_inode->i_mapping);
1101                 return 0;
1102         }
1103         mutex_lock(&root->fs_info->fs_mutex);
1104         trans = btrfs_start_transaction(root, 1);
1105         ret = btrfs_commit_transaction(trans, root);
1106         sb->s_dirt = 0;
1107         BUG_ON(ret);
1108 printk("btrfs sync_fs\n");
1109         mutex_unlock(&root->fs_info->fs_mutex);
1110         return 0;
1111 }
1112
1113 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1114                            struct buffer_head *result, int create)
1115 {
1116         int ret;
1117         int err = 0;
1118         u64 blocknr;
1119         u64 extent_start = 0;
1120         u64 extent_end = 0;
1121         u64 objectid = inode->i_ino;
1122         u32 found_type;
1123         struct btrfs_path *path;
1124         struct btrfs_root *root = BTRFS_I(inode)->root;
1125         struct btrfs_file_extent_item *item;
1126         struct btrfs_leaf *leaf;
1127         struct btrfs_disk_key *found_key;
1128
1129         path = btrfs_alloc_path();
1130         BUG_ON(!path);
1131         btrfs_init_path(path);
1132         if (create) {
1133                 WARN_ON(1);
1134         }
1135
1136         ret = btrfs_lookup_file_extent(NULL, root, path,
1137                                        inode->i_ino,
1138                                        iblock << inode->i_blkbits, 0);
1139         if (ret < 0) {
1140                 err = ret;
1141                 goto out;
1142         }
1143
1144         if (ret != 0) {
1145                 if (path->slots[0] == 0) {
1146                         btrfs_release_path(root, path);
1147                         goto out;
1148                 }
1149                 path->slots[0]--;
1150         }
1151
1152         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1153                               struct btrfs_file_extent_item);
1154         leaf = btrfs_buffer_leaf(path->nodes[0]);
1155         blocknr = btrfs_file_extent_disk_blocknr(item);
1156         blocknr += btrfs_file_extent_offset(item);
1157
1158         /* are we inside the extent that was found? */
1159         found_key = &leaf->items[path->slots[0]].key;
1160         found_type = btrfs_disk_key_type(found_key);
1161         if (btrfs_disk_key_objectid(found_key) != objectid ||
1162             found_type != BTRFS_EXTENT_DATA_KEY) {
1163                 extent_end = 0;
1164                 extent_start = 0;
1165                 goto out;
1166         }
1167         found_type = btrfs_file_extent_type(item);
1168         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1169         if (found_type == BTRFS_FILE_EXTENT_REG) {
1170                 extent_start = extent_start >> inode->i_blkbits;
1171                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1172                 if (iblock >= extent_start && iblock < extent_end) {
1173                         err = 0;
1174                         btrfs_map_bh_to_logical(root, result, blocknr +
1175                                                 iblock - extent_start);
1176                         goto out;
1177                 }
1178         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1179                 char *ptr;
1180                 char *map;
1181                 u32 size;
1182                 size = btrfs_file_extent_inline_len(leaf->items +
1183                                                     path->slots[0]);
1184                 extent_end = (extent_start + size) >> inode->i_blkbits;
1185                 extent_start >>= inode->i_blkbits;
1186                 if (iblock < extent_start || iblock > extent_end) {
1187                         goto out;
1188                 }
1189                 ptr = btrfs_file_extent_inline_start(item);
1190                 map = kmap(result->b_page);
1191                 memcpy(map, ptr, size);
1192                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1193                 flush_dcache_page(result->b_page);
1194                 kunmap(result->b_page);
1195                 set_buffer_uptodate(result);
1196                 SetPageChecked(result->b_page);
1197                 btrfs_map_bh_to_logical(root, result, 0);
1198         }
1199 out:
1200         btrfs_free_path(path);
1201         return err;
1202 }
1203
1204 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1205                            struct buffer_head *result, int create)
1206 {
1207         int err;
1208         struct btrfs_root *root = BTRFS_I(inode)->root;
1209         mutex_lock(&root->fs_info->fs_mutex);
1210         err = btrfs_get_block_lock(inode, iblock, result, create);
1211         mutex_unlock(&root->fs_info->fs_mutex);
1212         return err;
1213 }
1214
1215 static int btrfs_prepare_write(struct file *file, struct page *page,
1216                                unsigned from, unsigned to)
1217 {
1218         return nobh_prepare_write(page, from, to, btrfs_get_block);
1219 }
1220
1221 static void btrfs_write_super(struct super_block *sb)
1222 {
1223         btrfs_sync_fs(sb, 1);
1224 }
1225
1226 static int btrfs_readpage(struct file *file, struct page *page)
1227 {
1228         return mpage_readpage(page, btrfs_get_block);
1229 }
1230
1231 /*
1232  * While block_write_full_page is writing back the dirty buffers under
1233  * the page lock, whoever dirtied the buffers may decide to clean them
1234  * again at any time.  We handle that by only looking at the buffer
1235  * state inside lock_buffer().
1236  *
1237  * If block_write_full_page() is called for regular writeback
1238  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1239  * locked buffer.   This only can happen if someone has written the buffer
1240  * directly, with submit_bh().  At the address_space level PageWriteback
1241  * prevents this contention from occurring.
1242  */
1243 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1244                                    struct writeback_control *wbc)
1245 {
1246         int err;
1247         sector_t block;
1248         sector_t last_block;
1249         struct buffer_head *bh, *head;
1250         const unsigned blocksize = 1 << inode->i_blkbits;
1251         int nr_underway = 0;
1252
1253         BUG_ON(!PageLocked(page));
1254
1255         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1256
1257         if (!page_has_buffers(page)) {
1258                 create_empty_buffers(page, blocksize,
1259                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1260         }
1261
1262         /*
1263          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1264          * here, and the (potentially unmapped) buffers may become dirty at
1265          * any time.  If a buffer becomes dirty here after we've inspected it
1266          * then we just miss that fact, and the page stays dirty.
1267          *
1268          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1269          * handle that here by just cleaning them.
1270          */
1271
1272         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1273         head = page_buffers(page);
1274         bh = head;
1275
1276         /*
1277          * Get all the dirty buffers mapped to disk addresses and
1278          * handle any aliases from the underlying blockdev's mapping.
1279          */
1280         do {
1281                 if (block > last_block) {
1282                         /*
1283                          * mapped buffers outside i_size will occur, because
1284                          * this page can be outside i_size when there is a
1285                          * truncate in progress.
1286                          */
1287                         /*
1288                          * The buffer was zeroed by block_write_full_page()
1289                          */
1290                         clear_buffer_dirty(bh);
1291                         set_buffer_uptodate(bh);
1292                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1293                         WARN_ON(bh->b_size != blocksize);
1294                         err = btrfs_get_block(inode, block, bh, 0);
1295                         if (err) {
1296 printk("writepage going to recovery err %d\n", err);
1297                                 goto recover;
1298                         }
1299                         if (buffer_new(bh)) {
1300                                 /* blockdev mappings never come here */
1301                                 clear_buffer_new(bh);
1302                         }
1303                 }
1304                 bh = bh->b_this_page;
1305                 block++;
1306         } while (bh != head);
1307
1308         do {
1309                 if (!buffer_mapped(bh))
1310                         continue;
1311                 /*
1312                  * If it's a fully non-blocking write attempt and we cannot
1313                  * lock the buffer then redirty the page.  Note that this can
1314                  * potentially cause a busy-wait loop from pdflush and kswapd
1315                  * activity, but those code paths have their own higher-level
1316                  * throttling.
1317                  */
1318                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1319                         lock_buffer(bh);
1320                 } else if (test_set_buffer_locked(bh)) {
1321                         redirty_page_for_writepage(wbc, page);
1322                         continue;
1323                 }
1324                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1325                         mark_buffer_async_write(bh);
1326                 } else {
1327                         unlock_buffer(bh);
1328                 }
1329         } while ((bh = bh->b_this_page) != head);
1330
1331         /*
1332          * The page and its buffers are protected by PageWriteback(), so we can
1333          * drop the bh refcounts early.
1334          */
1335         BUG_ON(PageWriteback(page));
1336         set_page_writeback(page);
1337
1338         do {
1339                 struct buffer_head *next = bh->b_this_page;
1340                 if (buffer_async_write(bh)) {
1341                         submit_bh(WRITE, bh);
1342                         nr_underway++;
1343                 }
1344                 bh = next;
1345         } while (bh != head);
1346         unlock_page(page);
1347
1348         err = 0;
1349 done:
1350         if (nr_underway == 0) {
1351                 /*
1352                  * The page was marked dirty, but the buffers were
1353                  * clean.  Someone wrote them back by hand with
1354                  * ll_rw_block/submit_bh.  A rare case.
1355                  */
1356                 int uptodate = 1;
1357                 do {
1358                         if (!buffer_uptodate(bh)) {
1359                                 uptodate = 0;
1360                                 break;
1361                         }
1362                         bh = bh->b_this_page;
1363                 } while (bh != head);
1364                 if (uptodate)
1365                         SetPageUptodate(page);
1366                 end_page_writeback(page);
1367         }
1368         return err;
1369
1370 recover:
1371         /*
1372          * ENOSPC, or some other error.  We may already have added some
1373          * blocks to the file, so we need to write these out to avoid
1374          * exposing stale data.
1375          * The page is currently locked and not marked for writeback
1376          */
1377         bh = head;
1378         /* Recovery: lock and submit the mapped buffers */
1379         do {
1380                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1381                         lock_buffer(bh);
1382                         mark_buffer_async_write(bh);
1383                 } else {
1384                         /*
1385                          * The buffer may have been set dirty during
1386                          * attachment to a dirty page.
1387                          */
1388                         clear_buffer_dirty(bh);
1389                 }
1390         } while ((bh = bh->b_this_page) != head);
1391         SetPageError(page);
1392         BUG_ON(PageWriteback(page));
1393         set_page_writeback(page);
1394         do {
1395                 struct buffer_head *next = bh->b_this_page;
1396                 if (buffer_async_write(bh)) {
1397                         clear_buffer_dirty(bh);
1398                         submit_bh(WRITE, bh);
1399                         nr_underway++;
1400                 }
1401                 bh = next;
1402         } while (bh != head);
1403         unlock_page(page);
1404         goto done;
1405 }
1406
1407 /*
1408  * The generic ->writepage function for buffer-backed address_spaces
1409  */
1410 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1411 {
1412         struct inode * const inode = page->mapping->host;
1413         loff_t i_size = i_size_read(inode);
1414         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1415         unsigned offset;
1416         void *kaddr;
1417
1418         /* Is the page fully inside i_size? */
1419         if (page->index < end_index)
1420                 return __btrfs_write_full_page(inode, page, wbc);
1421
1422         /* Is the page fully outside i_size? (truncate in progress) */
1423         offset = i_size & (PAGE_CACHE_SIZE-1);
1424         if (page->index >= end_index+1 || !offset) {
1425                 /*
1426                  * The page may have dirty, unmapped buffers.  For example,
1427                  * they may have been added in ext3_writepage().  Make them
1428                  * freeable here, so the page does not leak.
1429                  */
1430                 block_invalidatepage(page, 0);
1431                 unlock_page(page);
1432                 return 0; /* don't care */
1433         }
1434
1435         /*
1436          * The page straddles i_size.  It must be zeroed out on each and every
1437          * writepage invokation because it may be mmapped.  "A file is mapped
1438          * in multiples of the page size.  For a file that is not a multiple of
1439          * the  page size, the remaining memory is zeroed when mapped, and
1440          * writes to that region are not written out to the file."
1441          */
1442         kaddr = kmap_atomic(page, KM_USER0);
1443         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1444         flush_dcache_page(page);
1445         kunmap_atomic(kaddr, KM_USER0);
1446         return __btrfs_write_full_page(inode, page, wbc);
1447 }
1448
1449 static void btrfs_truncate(struct inode *inode)
1450 {
1451         struct btrfs_root *root = BTRFS_I(inode)->root;
1452         int ret;
1453         struct btrfs_trans_handle *trans;
1454
1455         if (!S_ISREG(inode->i_mode))
1456                 return;
1457         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1458                 return;
1459
1460         nobh_truncate_page(inode->i_mapping, inode->i_size);
1461
1462         /* FIXME, add redo link to tree so we don't leak on crash */
1463         mutex_lock(&root->fs_info->fs_mutex);
1464         trans = btrfs_start_transaction(root, 1);
1465         btrfs_set_trans_block_group(trans, inode);
1466         ret = btrfs_truncate_in_trans(trans, root, inode);
1467         BUG_ON(ret);
1468         btrfs_update_inode(trans, root, inode);
1469         ret = btrfs_end_transaction(trans, root);
1470         BUG_ON(ret);
1471         mutex_unlock(&root->fs_info->fs_mutex);
1472         btrfs_btree_balance_dirty(root);
1473 }
1474
1475 /*
1476  * Make sure any changes to nobh_commit_write() are reflected in
1477  * nobh_truncate_page(), since it doesn't call commit_write().
1478  */
1479 static int btrfs_commit_write(struct file *file, struct page *page,
1480                               unsigned from, unsigned to)
1481 {
1482         struct inode *inode = page->mapping->host;
1483         struct buffer_head *bh;
1484         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1485
1486         SetPageUptodate(page);
1487         bh = page_buffers(page);
1488         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1489                 set_page_dirty(page);
1490         }
1491         if (pos > inode->i_size) {
1492                 i_size_write(inode, pos);
1493                 mark_inode_dirty(inode);
1494         }
1495         return 0;
1496 }
1497
1498 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1499                                 struct page **prepared_pages,
1500                                 const char __user * buf)
1501 {
1502         long page_fault = 0;
1503         int i;
1504         int offset = pos & (PAGE_CACHE_SIZE - 1);
1505
1506         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1507                 size_t count = min_t(size_t,
1508                                      PAGE_CACHE_SIZE - offset, write_bytes);
1509                 struct page *page = prepared_pages[i];
1510                 fault_in_pages_readable(buf, count);
1511
1512                 /* Copy data from userspace to the current page */
1513                 kmap(page);
1514                 page_fault = __copy_from_user(page_address(page) + offset,
1515                                               buf, count);
1516                 /* Flush processor's dcache for this page */
1517                 flush_dcache_page(page);
1518                 kunmap(page);
1519                 buf += count;
1520                 write_bytes -= count;
1521
1522                 if (page_fault)
1523                         break;
1524         }
1525         return page_fault ? -EFAULT : 0;
1526 }
1527
1528 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1529 {
1530         size_t i;
1531         for (i = 0; i < num_pages; i++) {
1532                 if (!pages[i])
1533                         break;
1534                 unlock_page(pages[i]);
1535                 mark_page_accessed(pages[i]);
1536                 page_cache_release(pages[i]);
1537         }
1538 }
1539 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1540                                    struct btrfs_root *root,
1541                                    struct file *file,
1542                                    struct page **pages,
1543                                    size_t num_pages,
1544                                    loff_t pos,
1545                                    size_t write_bytes)
1546 {
1547         int i;
1548         int offset;
1549         int err = 0;
1550         int ret;
1551         int this_write;
1552         struct inode *inode = file->f_path.dentry->d_inode;
1553         struct buffer_head *bh;
1554         struct btrfs_file_extent_item *ei;
1555
1556         for (i = 0; i < num_pages; i++) {
1557                 offset = pos & (PAGE_CACHE_SIZE -1);
1558                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1559                 /* FIXME, one block at a time */
1560
1561                 mutex_lock(&root->fs_info->fs_mutex);
1562                 trans = btrfs_start_transaction(root, 1);
1563                 btrfs_set_trans_block_group(trans, inode);
1564
1565                 bh = page_buffers(pages[i]);
1566                 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1567                         struct btrfs_key key;
1568                         struct btrfs_path *path;
1569                         char *ptr;
1570                         u32 datasize;
1571
1572                         path = btrfs_alloc_path();
1573                         BUG_ON(!path);
1574                         key.objectid = inode->i_ino;
1575                         key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1576                         key.flags = 0;
1577                         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1578                         BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1579                         datasize = offset +
1580                                 btrfs_file_extent_calc_inline_size(write_bytes);
1581                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1582                                                       datasize);
1583                         BUG_ON(ret);
1584                         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1585                                path->slots[0], struct btrfs_file_extent_item);
1586                         btrfs_set_file_extent_generation(ei, trans->transid);
1587                         btrfs_set_file_extent_type(ei,
1588                                                    BTRFS_FILE_EXTENT_INLINE);
1589                         ptr = btrfs_file_extent_inline_start(ei);
1590                         btrfs_memcpy(root, path->nodes[0]->b_data,
1591                                      ptr, bh->b_data, offset + write_bytes);
1592                         mark_buffer_dirty(path->nodes[0]);
1593                         btrfs_free_path(path);
1594                 } else {
1595                         btrfs_csum_file_block(trans, root, inode->i_ino,
1596                                       pages[i]->index << PAGE_CACHE_SHIFT,
1597                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1598                         kunmap(pages[i]);
1599                 }
1600                 SetPageChecked(pages[i]);
1601                 // btrfs_update_inode_block_group(trans, inode);
1602                 ret = btrfs_end_transaction(trans, root);
1603                 BUG_ON(ret);
1604                 mutex_unlock(&root->fs_info->fs_mutex);
1605
1606                 ret = btrfs_commit_write(file, pages[i], offset,
1607                                          offset + this_write);
1608                 pos += this_write;
1609                 if (ret) {
1610                         err = ret;
1611                         goto failed;
1612                 }
1613                 WARN_ON(this_write > write_bytes);
1614                 write_bytes -= this_write;
1615         }
1616 failed:
1617         return err;
1618 }
1619
1620 static int drop_extents(struct btrfs_trans_handle *trans,
1621                           struct btrfs_root *root,
1622                           struct inode *inode,
1623                           u64 start, u64 end, u64 *hint_block)
1624 {
1625         int ret;
1626         struct btrfs_key key;
1627         struct btrfs_leaf *leaf;
1628         int slot;
1629         struct btrfs_file_extent_item *extent;
1630         u64 extent_end = 0;
1631         int keep;
1632         struct btrfs_file_extent_item old;
1633         struct btrfs_path *path;
1634         u64 search_start = start;
1635         int bookend;
1636         int found_type;
1637         int found_extent;
1638         int found_inline;
1639
1640         path = btrfs_alloc_path();
1641         if (!path)
1642                 return -ENOMEM;
1643         while(1) {
1644                 btrfs_release_path(root, path);
1645                 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1646                                                search_start, -1);
1647                 if (ret < 0)
1648                         goto out;
1649                 if (ret > 0) {
1650                         if (path->slots[0] == 0) {
1651                                 ret = 0;
1652                                 goto out;
1653                         }
1654                         path->slots[0]--;
1655                 }
1656                 keep = 0;
1657                 bookend = 0;
1658                 found_extent = 0;
1659                 found_inline = 0;
1660                 extent = NULL;
1661                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1662                 slot = path->slots[0];
1663                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1664                 if (key.offset >= end || key.objectid != inode->i_ino) {
1665                         ret = 0;
1666                         goto out;
1667                 }
1668                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1669                         ret = 0;
1670                         goto out;
1671                 }
1672                 extent = btrfs_item_ptr(leaf, slot,
1673                                         struct btrfs_file_extent_item);
1674                 found_type = btrfs_file_extent_type(extent);
1675                 if (found_type == BTRFS_FILE_EXTENT_REG) {
1676                         extent_end = key.offset +
1677                                 (btrfs_file_extent_num_blocks(extent) <<
1678                                  inode->i_blkbits);
1679                         found_extent = 1;
1680                 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1681                         found_inline = 1;
1682                         extent_end = key.offset +
1683                              btrfs_file_extent_inline_len(leaf->items + slot);
1684                 }
1685
1686                 if (!found_extent && !found_inline) {
1687                         ret = 0;
1688                         goto out;
1689                 }
1690
1691                 if (search_start >= extent_end) {
1692                         ret = 0;
1693                         goto out;
1694                 }
1695
1696                 search_start = extent_end;
1697
1698                 if (end < extent_end && end >= key.offset) {
1699                         if (found_extent) {
1700                                 memcpy(&old, extent, sizeof(old));
1701                                 ret = btrfs_inc_extent_ref(trans, root,
1702                                       btrfs_file_extent_disk_blocknr(&old),
1703                                       btrfs_file_extent_disk_num_blocks(&old));
1704                                 BUG_ON(ret);
1705                         }
1706                         WARN_ON(found_inline);
1707                         bookend = 1;
1708                 }
1709
1710                 if (start > key.offset) {
1711                         u64 new_num;
1712                         u64 old_num;
1713                         /* truncate existing extent */
1714                         keep = 1;
1715                         WARN_ON(start & (root->blocksize - 1));
1716                         if (found_extent) {
1717                                 new_num = (start - key.offset) >>
1718                                         inode->i_blkbits;
1719                                 old_num = btrfs_file_extent_num_blocks(extent);
1720                                 *hint_block =
1721                                         btrfs_file_extent_disk_blocknr(extent);
1722                                 inode->i_blocks -= (old_num - new_num) << 3;
1723                                 btrfs_set_file_extent_num_blocks(extent,
1724                                                                  new_num);
1725                                 mark_buffer_dirty(path->nodes[0]);
1726                         } else {
1727                                 WARN_ON(1);
1728                         }
1729                 }
1730                 if (!keep) {
1731                         u64 disk_blocknr = 0;
1732                         u64 disk_num_blocks = 0;
1733                         u64 extent_num_blocks = 0;
1734                         if (found_extent) {
1735                                 disk_blocknr =
1736                                       btrfs_file_extent_disk_blocknr(extent);
1737                                 disk_num_blocks =
1738                                       btrfs_file_extent_disk_num_blocks(extent);
1739                                 extent_num_blocks =
1740                                       btrfs_file_extent_num_blocks(extent);
1741                                 *hint_block =
1742                                         btrfs_file_extent_disk_blocknr(extent);
1743                         }
1744                         ret = btrfs_del_item(trans, root, path);
1745                         BUG_ON(ret);
1746                         btrfs_release_path(root, path);
1747                         extent = NULL;
1748                         if (found_extent) {
1749                                 inode->i_blocks -= extent_num_blocks << 3;
1750                                 ret = btrfs_free_extent(trans, root,
1751                                                         disk_blocknr,
1752                                                         disk_num_blocks, 0);
1753                         }
1754
1755                         BUG_ON(ret);
1756                         if (!bookend && search_start >= end) {
1757                                 ret = 0;
1758                                 goto out;
1759                         }
1760                         if (!bookend)
1761                                 continue;
1762                 }
1763                 if (bookend && found_extent) {
1764                         /* create bookend */
1765                         struct btrfs_key ins;
1766                         ins.objectid = inode->i_ino;
1767                         ins.offset = end;
1768                         ins.flags = 0;
1769                         btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1770
1771                         btrfs_release_path(root, path);
1772                         ret = btrfs_insert_empty_item(trans, root, path, &ins,
1773                                                       sizeof(*extent));
1774                         BUG_ON(ret);
1775                         extent = btrfs_item_ptr(
1776                                     btrfs_buffer_leaf(path->nodes[0]),
1777                                     path->slots[0],
1778                                     struct btrfs_file_extent_item);
1779                         btrfs_set_file_extent_disk_blocknr(extent,
1780                                     btrfs_file_extent_disk_blocknr(&old));
1781                         btrfs_set_file_extent_disk_num_blocks(extent,
1782                                     btrfs_file_extent_disk_num_blocks(&old));
1783
1784                         btrfs_set_file_extent_offset(extent,
1785                                     btrfs_file_extent_offset(&old) +
1786                                     ((end - key.offset) >> inode->i_blkbits));
1787                         WARN_ON(btrfs_file_extent_num_blocks(&old) <
1788                                 (end - key.offset) >> inode->i_blkbits);
1789                         btrfs_set_file_extent_num_blocks(extent,
1790                                     btrfs_file_extent_num_blocks(&old) -
1791                                     ((end - key.offset) >> inode->i_blkbits));
1792
1793                         btrfs_set_file_extent_type(extent,
1794                                                    BTRFS_FILE_EXTENT_REG);
1795                         btrfs_set_file_extent_generation(extent,
1796                                     btrfs_file_extent_generation(&old));
1797                         btrfs_mark_buffer_dirty(path->nodes[0]);
1798                         inode->i_blocks +=
1799                                 btrfs_file_extent_num_blocks(extent) << 3;
1800                         ret = 0;
1801                         goto out;
1802                 }
1803         }
1804 out:
1805         btrfs_free_path(path);
1806         return ret;
1807 }
1808
1809 static int prepare_pages(struct btrfs_root *root,
1810                          struct file *file,
1811                          struct page **pages,
1812                          size_t num_pages,
1813                          loff_t pos,
1814                          unsigned long first_index,
1815                          unsigned long last_index,
1816                          size_t write_bytes,
1817                          u64 alloc_extent_start)
1818 {
1819         int i;
1820         unsigned long index = pos >> PAGE_CACHE_SHIFT;
1821         struct inode *inode = file->f_path.dentry->d_inode;
1822         int offset;
1823         int err = 0;
1824         int this_write;
1825         struct buffer_head *bh;
1826         struct buffer_head *head;
1827         loff_t isize = i_size_read(inode);
1828
1829         memset(pages, 0, num_pages * sizeof(struct page *));
1830
1831         for (i = 0; i < num_pages; i++) {
1832                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1833                 if (!pages[i]) {
1834                         err = -ENOMEM;
1835                         goto failed_release;
1836                 }
1837                 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
1838                 wait_on_page_writeback(pages[i]);
1839                 offset = pos & (PAGE_CACHE_SIZE -1);
1840                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1841                 if (!page_has_buffers(pages[i])) {
1842                         create_empty_buffers(pages[i],
1843                                              root->fs_info->sb->s_blocksize,
1844                                              (1 << BH_Uptodate));
1845                 }
1846                 head = page_buffers(pages[i]);
1847                 bh = head;
1848                 do {
1849                         err = btrfs_map_bh_to_logical(root, bh,
1850                                                       alloc_extent_start);
1851                         BUG_ON(err);
1852                         if (err)
1853                                 goto failed_truncate;
1854                         bh = bh->b_this_page;
1855                         if (alloc_extent_start)
1856                                 alloc_extent_start++;
1857                 } while (bh != head);
1858                 pos += this_write;
1859                 WARN_ON(this_write > write_bytes);
1860                 write_bytes -= this_write;
1861         }
1862         return 0;
1863
1864 failed_release:
1865         btrfs_drop_pages(pages, num_pages);
1866         return err;
1867
1868 failed_truncate:
1869         btrfs_drop_pages(pages, num_pages);
1870         if (pos > isize)
1871                 vmtruncate(inode, isize);
1872         return err;
1873 }
1874
1875 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1876                                 size_t count, loff_t *ppos)
1877 {
1878         loff_t pos;
1879         size_t num_written = 0;
1880         int err = 0;
1881         int ret = 0;
1882         struct inode *inode = file->f_path.dentry->d_inode;
1883         struct btrfs_root *root = BTRFS_I(inode)->root;
1884         struct page *pages[8];
1885         struct page *pinned[2];
1886         unsigned long first_index;
1887         unsigned long last_index;
1888         u64 start_pos;
1889         u64 num_blocks;
1890         u64 alloc_extent_start;
1891         u64 hint_block;
1892         struct btrfs_trans_handle *trans;
1893         struct btrfs_key ins;
1894         pinned[0] = NULL;
1895         pinned[1] = NULL;
1896         if (file->f_flags & O_DIRECT)
1897                 return -EINVAL;
1898         pos = *ppos;
1899         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1900         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1901         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1902         if (err)
1903                 goto out;
1904         if (count == 0)
1905                 goto out;
1906         err = remove_suid(file->f_path.dentry);
1907         if (err)
1908                 goto out;
1909         file_update_time(file);
1910
1911         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1912         num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1913                         inode->i_blkbits;
1914
1915         mutex_lock(&inode->i_mutex);
1916         first_index = pos >> PAGE_CACHE_SHIFT;
1917         last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1918
1919         if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1920             (pos & (PAGE_CACHE_SIZE - 1))) {
1921                 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1922                 if (!PageUptodate(pinned[0])) {
1923                         ret = mpage_readpage(pinned[0], btrfs_get_block);
1924                         BUG_ON(ret);
1925                         wait_on_page_locked(pinned[0]);
1926                 } else {
1927                         unlock_page(pinned[0]);
1928                 }
1929         }
1930         if (first_index != last_index &&
1931             (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1932             pos + count < inode->i_size &&
1933             (count & (PAGE_CACHE_SIZE - 1))) {
1934                 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1935                 if (!PageUptodate(pinned[1])) {
1936                         ret = mpage_readpage(pinned[1], btrfs_get_block);
1937                         BUG_ON(ret);
1938                         wait_on_page_locked(pinned[1]);
1939                 } else {
1940                         unlock_page(pinned[1]);
1941                 }
1942         }
1943
1944         mutex_lock(&root->fs_info->fs_mutex);
1945         trans = btrfs_start_transaction(root, 1);
1946         if (!trans) {
1947                 err = -ENOMEM;
1948                 mutex_unlock(&root->fs_info->fs_mutex);
1949                 goto out_unlock;
1950         }
1951         btrfs_set_trans_block_group(trans, inode);
1952         /* FIXME blocksize != 4096 */
1953         inode->i_blocks += num_blocks << 3;
1954         hint_block = 0;
1955         if (start_pos < inode->i_size) {
1956                 /* FIXME blocksize != pagesize */
1957                 ret = drop_extents(trans, root, inode,
1958                                    start_pos,
1959                                    (pos + count + root->blocksize -1) &
1960                                    ~((u64)root->blocksize - 1), &hint_block);
1961                 BUG_ON(ret);
1962         }
1963         if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
1964             pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
1965                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1966                                          num_blocks, hint_block, (u64)-1,
1967                                          &ins, 1);
1968                 BUG_ON(ret);
1969                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1970                                        start_pos, ins.objectid, ins.offset);
1971                 BUG_ON(ret);
1972         } else {
1973                 ins.offset = 0;
1974                 ins.objectid = 0;
1975         }
1976         BUG_ON(ret);
1977         alloc_extent_start = ins.objectid;
1978         // btrfs_update_inode_block_group(trans, inode);
1979         ret = btrfs_end_transaction(trans, root);
1980         mutex_unlock(&root->fs_info->fs_mutex);
1981
1982         while(count > 0) {
1983                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
1984                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
1985                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
1986                                         PAGE_CACHE_SHIFT;
1987
1988                 memset(pages, 0, sizeof(pages));
1989                 ret = prepare_pages(root, file, pages, num_pages,
1990                                     pos, first_index, last_index,
1991                                     write_bytes, alloc_extent_start);
1992                 BUG_ON(ret);
1993
1994                 /* FIXME blocks != pagesize */
1995                 if (alloc_extent_start)
1996                         alloc_extent_start += num_pages;
1997                 ret = btrfs_copy_from_user(pos, num_pages,
1998                                            write_bytes, pages, buf);
1999                 BUG_ON(ret);
2000
2001                 ret = dirty_and_release_pages(NULL, root, file, pages,
2002                                               num_pages, pos, write_bytes);
2003                 BUG_ON(ret);
2004                 btrfs_drop_pages(pages, num_pages);
2005
2006                 buf += write_bytes;
2007                 count -= write_bytes;
2008                 pos += write_bytes;
2009                 num_written += write_bytes;
2010
2011                 balance_dirty_pages_ratelimited(inode->i_mapping);
2012                 btrfs_btree_balance_dirty(root);
2013                 cond_resched();
2014         }
2015 out_unlock:
2016         mutex_unlock(&inode->i_mutex);
2017 out:
2018         if (pinned[0])
2019                 page_cache_release(pinned[0]);
2020         if (pinned[1])
2021                 page_cache_release(pinned[1]);
2022         *ppos = pos;
2023         current->backing_dev_info = NULL;
2024         mark_inode_dirty(inode);
2025         return num_written ? num_written : err;
2026 }
2027
2028 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
2029                         unsigned long offset, unsigned long size)
2030 {
2031         char *kaddr;
2032         unsigned long left, count = desc->count;
2033         struct inode *inode = page->mapping->host;
2034
2035         if (size > count)
2036                 size = count;
2037
2038         if (!PageChecked(page)) {
2039                 /* FIXME, do it per block */
2040                 struct btrfs_root *root = BTRFS_I(inode)->root;
2041
2042                 int ret = btrfs_csum_verify_file_block(root,
2043                                   page->mapping->host->i_ino,
2044                                   page->index << PAGE_CACHE_SHIFT,
2045                                   kmap(page), PAGE_CACHE_SIZE);
2046                 if (ret) {
2047                         printk("failed to verify ino %lu page %lu\n",
2048                                page->mapping->host->i_ino,
2049                                page->index);
2050                         memset(page_address(page), 0, PAGE_CACHE_SIZE);
2051                 }
2052                 SetPageChecked(page);
2053                 kunmap(page);
2054         }
2055         /*
2056          * Faults on the destination of a read are common, so do it before
2057          * taking the kmap.
2058          */
2059         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
2060                 kaddr = kmap_atomic(page, KM_USER0);
2061                 left = __copy_to_user_inatomic(desc->arg.buf,
2062                                                 kaddr + offset, size);
2063                 kunmap_atomic(kaddr, KM_USER0);
2064                 if (left == 0)
2065                         goto success;
2066         }
2067
2068         /* Do it the slow way */
2069         kaddr = kmap(page);
2070         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2071         kunmap(page);
2072
2073         if (left) {
2074                 size -= left;
2075                 desc->error = -EFAULT;
2076         }
2077 success:
2078         desc->count = count - size;
2079         desc->written += size;
2080         desc->arg.buf += size;
2081         return size;
2082 }
2083
2084 /**
2085  * btrfs_file_aio_read - filesystem read routine
2086  * @iocb:       kernel I/O control block
2087  * @iov:        io vector request
2088  * @nr_segs:    number of segments in the iovec
2089  * @pos:        current file position
2090  */
2091 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2092                                    unsigned long nr_segs, loff_t pos)
2093 {
2094         struct file *filp = iocb->ki_filp;
2095         ssize_t retval;
2096         unsigned long seg;
2097         size_t count;
2098         loff_t *ppos = &iocb->ki_pos;
2099
2100         count = 0;
2101         for (seg = 0; seg < nr_segs; seg++) {
2102                 const struct iovec *iv = &iov[seg];
2103
2104                 /*
2105                  * If any segment has a negative length, or the cumulative
2106                  * length ever wraps negative then return -EINVAL.
2107                  */
2108                 count += iv->iov_len;
2109                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2110                         return -EINVAL;
2111                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2112                         continue;
2113                 if (seg == 0)
2114                         return -EFAULT;
2115                 nr_segs = seg;
2116                 count -= iv->iov_len;   /* This segment is no good */
2117                 break;
2118         }
2119         retval = 0;
2120         if (count) {
2121                 for (seg = 0; seg < nr_segs; seg++) {
2122                         read_descriptor_t desc;
2123
2124                         desc.written = 0;
2125                         desc.arg.buf = iov[seg].iov_base;
2126                         desc.count = iov[seg].iov_len;
2127                         if (desc.count == 0)
2128                                 continue;
2129                         desc.error = 0;
2130                         do_generic_file_read(filp, ppos, &desc,
2131                                              btrfs_read_actor);
2132                         retval += desc.written;
2133                         if (desc.error) {
2134                                 retval = retval ?: desc.error;
2135                                 break;
2136                         }
2137                 }
2138         }
2139         return retval;
2140 }
2141
2142 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2143 {
2144         struct btrfs_trans_handle *trans;
2145         struct btrfs_key key;
2146         struct btrfs_root_item root_item;
2147         struct btrfs_inode_item *inode_item;
2148         struct buffer_head *subvol;
2149         struct btrfs_leaf *leaf;
2150         struct btrfs_root *new_root;
2151         struct inode *inode;
2152         struct inode *dir;
2153         int ret;
2154         u64 objectid;
2155         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2156
2157         mutex_lock(&root->fs_info->fs_mutex);
2158         trans = btrfs_start_transaction(root, 1);
2159         BUG_ON(!trans);
2160
2161         subvol = btrfs_alloc_free_block(trans, root, 0);
2162         if (subvol == NULL)
2163                 return -ENOSPC;
2164         leaf = btrfs_buffer_leaf(subvol);
2165         btrfs_set_header_nritems(&leaf->header, 0);
2166         btrfs_set_header_level(&leaf->header, 0);
2167         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2168         btrfs_set_header_generation(&leaf->header, trans->transid);
2169         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2170         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2171                sizeof(leaf->header.fsid));
2172         mark_buffer_dirty(subvol);
2173
2174         inode_item = &root_item.inode;
2175         memset(inode_item, 0, sizeof(*inode_item));
2176         btrfs_set_inode_generation(inode_item, 1);
2177         btrfs_set_inode_size(inode_item, 3);
2178         btrfs_set_inode_nlink(inode_item, 1);
2179         btrfs_set_inode_nblocks(inode_item, 1);
2180         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2181
2182         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2183         btrfs_set_root_refs(&root_item, 1);
2184         brelse(subvol);
2185         subvol = NULL;
2186
2187         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2188                                        0, &objectid);
2189         BUG_ON(ret);
2190
2191         btrfs_set_root_dirid(&root_item, new_dirid);
2192
2193         key.objectid = objectid;
2194         key.offset = 1;
2195         key.flags = 0;
2196         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2197         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2198                                 &root_item);
2199         BUG_ON(ret);
2200
2201         /*
2202          * insert the directory item
2203          */
2204         key.offset = (u64)-1;
2205         dir = root->fs_info->sb->s_root->d_inode;
2206         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2207                                     name, namelen, dir->i_ino, &key, 0);
2208         BUG_ON(ret);
2209
2210         ret = btrfs_commit_transaction(trans, root);
2211         BUG_ON(ret);
2212
2213         new_root = btrfs_read_fs_root(root->fs_info, &key);
2214         BUG_ON(!new_root);
2215
2216         trans = btrfs_start_transaction(new_root, 1);
2217         BUG_ON(!trans);
2218
2219         inode = btrfs_new_inode(trans, new_root, new_dirid,
2220                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2221         inode->i_op = &btrfs_dir_inode_operations;
2222         inode->i_fop = &btrfs_dir_file_operations;
2223
2224         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2225         BUG_ON(ret);
2226
2227         inode->i_nlink = 1;
2228         inode->i_size = 6;
2229         ret = btrfs_update_inode(trans, new_root, inode);
2230         BUG_ON(ret);
2231
2232         ret = btrfs_commit_transaction(trans, new_root);
2233         BUG_ON(ret);
2234
2235         iput(inode);
2236
2237         mutex_unlock(&root->fs_info->fs_mutex);
2238         btrfs_btree_balance_dirty(root);
2239         return 0;
2240 }
2241
2242 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2243 {
2244         struct btrfs_trans_handle *trans;
2245         struct btrfs_key key;
2246         struct btrfs_root_item new_root_item;
2247         int ret;
2248         u64 objectid;
2249
2250         if (!root->ref_cows)
2251                 return -EINVAL;
2252
2253         mutex_lock(&root->fs_info->fs_mutex);
2254         trans = btrfs_start_transaction(root, 1);
2255         BUG_ON(!trans);
2256
2257         ret = btrfs_update_inode(trans, root, root->inode);
2258         BUG_ON(ret);
2259
2260         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2261                                        0, &objectid);
2262         BUG_ON(ret);
2263
2264         memcpy(&new_root_item, &root->root_item,
2265                sizeof(new_root_item));
2266
2267         key.objectid = objectid;
2268         key.offset = 1;
2269         key.flags = 0;
2270         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2271         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2272
2273         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2274                                 &new_root_item);
2275         BUG_ON(ret);
2276
2277         /*
2278          * insert the directory item
2279          */
2280         key.offset = (u64)-1;
2281         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2282                                     name, namelen,
2283                                     root->fs_info->sb->s_root->d_inode->i_ino,
2284                                     &key, 0);
2285
2286         BUG_ON(ret);
2287
2288         ret = btrfs_inc_root_ref(trans, root);
2289         BUG_ON(ret);
2290
2291         ret = btrfs_commit_transaction(trans, root);
2292         BUG_ON(ret);
2293         mutex_unlock(&root->fs_info->fs_mutex);
2294         btrfs_btree_balance_dirty(root);
2295         return 0;
2296 }
2297
2298 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2299 {
2300         struct block_device *bdev;
2301         struct btrfs_path *path;
2302         struct super_block *sb = root->fs_info->sb;
2303         struct btrfs_root *dev_root = root->fs_info->dev_root;
2304         struct btrfs_trans_handle *trans;
2305         struct btrfs_device_item *dev_item;
2306         struct btrfs_key key;
2307         u16 item_size;
2308         u64 num_blocks;
2309         u64 new_blocks;
2310         u64 device_id;
2311         int ret;
2312
2313 printk("adding disk %s\n", name);
2314         path = btrfs_alloc_path();
2315         if (!path)
2316                 return -ENOMEM;
2317         num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2318         bdev = open_bdev_excl(name, O_RDWR, sb);
2319         if (IS_ERR(bdev)) {
2320                 ret = PTR_ERR(bdev);
2321 printk("open bdev excl failed ret %d\n", ret);
2322                 goto out_nolock;
2323         }
2324         set_blocksize(bdev, sb->s_blocksize);
2325         new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2326         key.objectid = num_blocks;
2327         key.offset = new_blocks;
2328         key.flags = 0;
2329         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2330
2331         mutex_lock(&dev_root->fs_info->fs_mutex);
2332         trans = btrfs_start_transaction(dev_root, 1);
2333         item_size = sizeof(*dev_item) + namelen;
2334 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2335         ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2336         if (ret) {
2337 printk("insert failed %d\n", ret);
2338                 close_bdev_excl(bdev);
2339                 if (ret > 0)
2340                         ret = -EEXIST;
2341                 goto out;
2342         }
2343         dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2344                                   path->slots[0], struct btrfs_device_item);
2345         btrfs_set_device_pathlen(dev_item, namelen);
2346         memcpy(dev_item + 1, name, namelen);
2347
2348         device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2349         btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2350         btrfs_set_device_id(dev_item, device_id);
2351         mark_buffer_dirty(path->nodes[0]);
2352
2353         ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2354                                      new_blocks);
2355
2356         if (!ret) {
2357                 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2358                                              num_blocks + new_blocks);
2359                 i_size_write(root->fs_info->btree_inode,
2360                              (num_blocks + new_blocks) <<
2361                              root->fs_info->btree_inode->i_blkbits);
2362         }
2363
2364 out:
2365         ret = btrfs_commit_transaction(trans, dev_root);
2366         BUG_ON(ret);
2367         mutex_unlock(&root->fs_info->fs_mutex);
2368 out_nolock:
2369         btrfs_free_path(path);
2370         btrfs_btree_balance_dirty(root);
2371
2372         return ret;
2373 }
2374
2375 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2376                        cmd, unsigned long arg)
2377 {
2378         struct btrfs_root *root = BTRFS_I(inode)->root;
2379         struct btrfs_ioctl_vol_args vol_args;
2380         int ret = 0;
2381         struct btrfs_dir_item *di;
2382         int namelen;
2383         struct btrfs_path *path;
2384         u64 root_dirid;
2385
2386         switch (cmd) {
2387         case BTRFS_IOC_SNAP_CREATE:
2388                 if (copy_from_user(&vol_args,
2389                                    (struct btrfs_ioctl_vol_args __user *)arg,
2390                                    sizeof(vol_args)))
2391                         return -EFAULT;
2392                 namelen = strlen(vol_args.name);
2393                 if (namelen > BTRFS_VOL_NAME_MAX)
2394                         return -EINVAL;
2395                 path = btrfs_alloc_path();
2396                 if (!path)
2397                         return -ENOMEM;
2398                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2399                 mutex_lock(&root->fs_info->fs_mutex);
2400                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2401                                     path, root_dirid,
2402                                     vol_args.name, namelen, 0);
2403                 mutex_unlock(&root->fs_info->fs_mutex);
2404                 btrfs_free_path(path);
2405                 if (di && !IS_ERR(di))
2406                         return -EEXIST;
2407
2408                 if (root == root->fs_info->tree_root)
2409                         ret = create_subvol(root, vol_args.name, namelen);
2410                 else
2411                         ret = create_snapshot(root, vol_args.name, namelen);
2412                 WARN_ON(ret);
2413                 break;
2414         case BTRFS_IOC_ADD_DISK:
2415                 if (copy_from_user(&vol_args,
2416                                    (struct btrfs_ioctl_vol_args __user *)arg,
2417                                    sizeof(vol_args)))
2418                         return -EFAULT;
2419                 namelen = strlen(vol_args.name);
2420                 if (namelen > BTRFS_VOL_NAME_MAX)
2421                         return -EINVAL;
2422                 vol_args.name[namelen] = '\0';
2423                 ret = add_disk(root, vol_args.name, namelen);
2424                 break;
2425         default:
2426                 return -ENOTTY;
2427         }
2428         return ret;
2429 }
2430
2431 static struct kmem_cache *btrfs_inode_cachep;
2432 struct kmem_cache *btrfs_trans_handle_cachep;
2433 struct kmem_cache *btrfs_transaction_cachep;
2434 struct kmem_cache *btrfs_bit_radix_cachep;
2435 struct kmem_cache *btrfs_path_cachep;
2436
2437 /*
2438  * Called inside transaction, so use GFP_NOFS
2439  */
2440 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2441 {
2442         struct btrfs_inode *ei;
2443
2444         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2445         if (!ei)
2446                 return NULL;
2447         return &ei->vfs_inode;
2448 }
2449
2450 static void btrfs_destroy_inode(struct inode *inode)
2451 {
2452         WARN_ON(!list_empty(&inode->i_dentry));
2453         WARN_ON(inode->i_data.nrpages);
2454
2455         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2456 }
2457
2458 static void init_once(void * foo, struct kmem_cache * cachep,
2459                       unsigned long flags)
2460 {
2461         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2462
2463         if ((flags & (SLAB_CTOR_CONSTRUCTOR)) ==
2464             SLAB_CTOR_CONSTRUCTOR) {
2465                 inode_init_once(&ei->vfs_inode);
2466         }
2467 }
2468
2469 static int init_inodecache(void)
2470 {
2471         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2472                                              sizeof(struct btrfs_inode),
2473                                              0, (SLAB_RECLAIM_ACCOUNT|
2474                                                 SLAB_MEM_SPREAD),
2475                                              init_once, NULL);
2476         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2477                                              sizeof(struct btrfs_trans_handle),
2478                                              0, (SLAB_RECLAIM_ACCOUNT|
2479                                                 SLAB_MEM_SPREAD),
2480                                              NULL, NULL);
2481         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2482                                              sizeof(struct btrfs_transaction),
2483                                              0, (SLAB_RECLAIM_ACCOUNT|
2484                                                 SLAB_MEM_SPREAD),
2485                                              NULL, NULL);
2486         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2487                                              sizeof(struct btrfs_transaction),
2488                                              0, (SLAB_RECLAIM_ACCOUNT|
2489                                                 SLAB_MEM_SPREAD),
2490                                              NULL, NULL);
2491         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2492                                              256,
2493                                              0, (SLAB_RECLAIM_ACCOUNT|
2494                                                 SLAB_MEM_SPREAD |
2495                                                 SLAB_DESTROY_BY_RCU),
2496                                              NULL, NULL);
2497         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2498             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2499                 return -ENOMEM;
2500         return 0;
2501 }
2502
2503 static void destroy_inodecache(void)
2504 {
2505         kmem_cache_destroy(btrfs_inode_cachep);
2506         kmem_cache_destroy(btrfs_trans_handle_cachep);
2507         kmem_cache_destroy(btrfs_transaction_cachep);
2508         kmem_cache_destroy(btrfs_bit_radix_cachep);
2509         kmem_cache_destroy(btrfs_path_cachep);
2510 }
2511
2512 static int btrfs_get_sb(struct file_system_type *fs_type,
2513         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2514 {
2515         return get_sb_bdev(fs_type, flags, dev_name, data,
2516                            btrfs_fill_super, mnt);
2517 }
2518
2519 static int btrfs_getattr(struct vfsmount *mnt,
2520                          struct dentry *dentry, struct kstat *stat)
2521 {
2522         struct inode *inode = dentry->d_inode;
2523         generic_fillattr(inode, stat);
2524         stat->blksize = 256 * 1024;
2525         return 0;
2526 }
2527
2528 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2529 {
2530         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2531         struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2532
2533         buf->f_namelen = BTRFS_NAME_LEN;
2534         buf->f_blocks = btrfs_super_total_blocks(disk_super);
2535         buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2536         buf->f_bavail = buf->f_bfree;
2537         buf->f_bsize = dentry->d_sb->s_blocksize;
2538         buf->f_type = BTRFS_SUPER_MAGIC;
2539         return 0;
2540 }
2541
2542 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2543                            struct inode * new_dir,struct dentry *new_dentry)
2544 {
2545         struct btrfs_trans_handle *trans;
2546         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2547         struct inode *new_inode = new_dentry->d_inode;
2548         struct inode *old_inode = old_dentry->d_inode;
2549         struct timespec ctime = CURRENT_TIME;
2550         struct btrfs_path *path;
2551         struct btrfs_dir_item *di;
2552         int ret;
2553
2554         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2555             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2556                 return -ENOTEMPTY;
2557         }
2558         mutex_lock(&root->fs_info->fs_mutex);
2559         trans = btrfs_start_transaction(root, 1);
2560         btrfs_set_trans_block_group(trans, new_dir);
2561         path = btrfs_alloc_path();
2562         if (!path) {
2563                 ret = -ENOMEM;
2564                 goto out_fail;
2565         }
2566
2567         old_dentry->d_inode->i_nlink++;
2568         old_dir->i_ctime = old_dir->i_mtime = ctime;
2569         new_dir->i_ctime = new_dir->i_mtime = ctime;
2570         old_inode->i_ctime = ctime;
2571         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2572                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2573                 u64 old_parent_oid;
2574                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2575                                            "..", 2, -1);
2576                 if (IS_ERR(di)) {
2577                         ret = PTR_ERR(di);
2578                         goto out_fail;
2579                 }
2580                 if (!di) {
2581                         ret = -ENOENT;
2582                         goto out_fail;
2583                 }
2584                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2585                 ret = btrfs_del_item(trans, root, path);
2586                 if (ret) {
2587                         ret = -EIO;
2588                         goto out_fail;
2589                 }
2590                 btrfs_release_path(root, path);
2591
2592                 di = btrfs_lookup_dir_index_item(trans, root, path,
2593                                                  old_inode->i_ino,
2594                                                  old_parent_oid,
2595                                                  "..", 2, -1);
2596                 if (IS_ERR(di)) {
2597                         ret = PTR_ERR(di);
2598                         goto out_fail;
2599                 }
2600                 if (!di) {
2601                         ret = -ENOENT;
2602                         goto out_fail;
2603                 }
2604                 ret = btrfs_del_item(trans, root, path);
2605                 if (ret) {
2606                         ret = -EIO;
2607                         goto out_fail;
2608                 }
2609                 btrfs_release_path(root, path);
2610
2611                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2612                                             old_inode->i_ino, location, 0);
2613                 if (ret)
2614                         goto out_fail;
2615         }
2616
2617
2618         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2619         if (ret)
2620                 goto out_fail;
2621
2622         if (new_inode) {
2623                 new_inode->i_ctime = CURRENT_TIME;
2624                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2625                 if (ret)
2626                         goto out_fail;
2627                 if (S_ISDIR(new_inode->i_mode))
2628                         clear_nlink(new_inode);
2629                 else
2630                         drop_nlink(new_inode);
2631                 btrfs_update_inode(trans, root, new_inode);
2632         }
2633         ret = btrfs_add_link(trans, new_dentry, old_inode);
2634         if (ret)
2635                 goto out_fail;
2636
2637 out_fail:
2638         btrfs_free_path(path);
2639         btrfs_end_transaction(trans, root);
2640         mutex_unlock(&root->fs_info->fs_mutex);
2641         return ret;
2642 }
2643
2644 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2645                          const char *symname)
2646 {
2647         struct btrfs_trans_handle *trans;
2648         struct btrfs_root *root = BTRFS_I(dir)->root;
2649         struct btrfs_path *path;
2650         struct btrfs_key key;
2651         struct inode *inode;
2652         int err;
2653         int drop_inode = 0;
2654         u64 objectid;
2655         int name_len;
2656         int datasize;
2657         char *ptr;
2658         struct btrfs_file_extent_item *ei;
2659
2660         name_len = strlen(symname) + 1;
2661         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2662                 return -ENAMETOOLONG;
2663         mutex_lock(&root->fs_info->fs_mutex);
2664         trans = btrfs_start_transaction(root, 1);
2665         btrfs_set_trans_block_group(trans, dir);
2666
2667         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2668         if (err) {
2669                 err = -ENOSPC;
2670                 goto out_unlock;
2671         }
2672
2673         inode = btrfs_new_inode(trans, root, objectid,
2674                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2675         err = PTR_ERR(inode);
2676         if (IS_ERR(inode))
2677                 goto out_unlock;
2678
2679         btrfs_set_trans_block_group(trans, inode);
2680         err = btrfs_add_nondir(trans, dentry, inode);
2681         if (err)
2682                 drop_inode = 1;
2683         else {
2684                 inode->i_mapping->a_ops = &btrfs_aops;
2685                 inode->i_fop = &btrfs_file_operations;
2686                 inode->i_op = &btrfs_file_inode_operations;
2687         }
2688         dir->i_sb->s_dirt = 1;
2689         btrfs_update_inode_block_group(trans, inode);
2690         btrfs_update_inode_block_group(trans, dir);
2691         if (drop_inode)
2692                 goto out_unlock;
2693
2694         path = btrfs_alloc_path();
2695         BUG_ON(!path);
2696         key.objectid = inode->i_ino;
2697         key.offset = 0;
2698         key.flags = 0;
2699         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2700         datasize = btrfs_file_extent_calc_inline_size(name_len);
2701         err = btrfs_insert_empty_item(trans, root, path, &key,
2702                                       datasize);
2703         BUG_ON(err);
2704         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2705                path->slots[0], struct btrfs_file_extent_item);
2706         btrfs_set_file_extent_generation(ei, trans->transid);
2707         btrfs_set_file_extent_type(ei,
2708                                    BTRFS_FILE_EXTENT_INLINE);
2709         ptr = btrfs_file_extent_inline_start(ei);
2710         btrfs_memcpy(root, path->nodes[0]->b_data,
2711                      ptr, symname, name_len);
2712         mark_buffer_dirty(path->nodes[0]);
2713         btrfs_free_path(path);
2714         inode->i_op = &btrfs_symlink_inode_operations;
2715         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2716         inode->i_size = name_len - 1;
2717         btrfs_update_inode(trans, root, inode);
2718         err = 0;
2719
2720 out_unlock:
2721         btrfs_end_transaction(trans, root);
2722         mutex_unlock(&root->fs_info->fs_mutex);
2723
2724         if (drop_inode) {
2725                 inode_dec_link_count(inode);
2726                 iput(inode);
2727         }
2728         btrfs_btree_balance_dirty(root);
2729         return err;
2730 }
2731
2732 static struct file_system_type btrfs_fs_type = {
2733         .owner          = THIS_MODULE,
2734         .name           = "btrfs",
2735         .get_sb         = btrfs_get_sb,
2736         .kill_sb        = kill_block_super,
2737         .fs_flags       = FS_REQUIRES_DEV,
2738 };
2739
2740 static struct super_operations btrfs_super_ops = {
2741         .delete_inode   = btrfs_delete_inode,
2742         .put_super      = btrfs_put_super,
2743         .read_inode     = btrfs_read_locked_inode,
2744         .write_super    = btrfs_write_super,
2745         .sync_fs        = btrfs_sync_fs,
2746         .write_inode    = btrfs_write_inode,
2747         .dirty_inode    = btrfs_dirty_inode,
2748         .alloc_inode    = btrfs_alloc_inode,
2749         .destroy_inode  = btrfs_destroy_inode,
2750         .statfs         = btrfs_statfs,
2751 };
2752
2753 static struct inode_operations btrfs_dir_inode_operations = {
2754         .lookup         = btrfs_lookup,
2755         .create         = btrfs_create,
2756         .unlink         = btrfs_unlink,
2757         .link           = btrfs_link,
2758         .mkdir          = btrfs_mkdir,
2759         .rmdir          = btrfs_rmdir,
2760         .rename         = btrfs_rename,
2761         .symlink        = btrfs_symlink,
2762 };
2763
2764 static struct inode_operations btrfs_dir_ro_inode_operations = {
2765         .lookup         = btrfs_lookup,
2766 };
2767
2768 static struct file_operations btrfs_dir_file_operations = {
2769         .llseek         = generic_file_llseek,
2770         .read           = generic_read_dir,
2771         .readdir        = btrfs_readdir,
2772         .ioctl          = btrfs_ioctl,
2773 };
2774
2775 static struct address_space_operations btrfs_aops = {
2776         .readpage       = btrfs_readpage,
2777         .writepage      = btrfs_writepage,
2778         .sync_page      = block_sync_page,
2779         .prepare_write  = btrfs_prepare_write,
2780         .commit_write   = btrfs_commit_write,
2781 };
2782
2783 static struct address_space_operations btrfs_symlink_aops = {
2784         .readpage       = btrfs_readpage,
2785         .writepage      = btrfs_writepage,
2786 };
2787
2788 static struct inode_operations btrfs_file_inode_operations = {
2789         .truncate       = btrfs_truncate,
2790         .getattr        = btrfs_getattr,
2791 };
2792
2793 static struct file_operations btrfs_file_operations = {
2794         .llseek         = generic_file_llseek,
2795         .read           = do_sync_read,
2796         .aio_read       = btrfs_file_aio_read,
2797         .write          = btrfs_file_write,
2798         .mmap           = generic_file_mmap,
2799         .open           = generic_file_open,
2800         .ioctl          = btrfs_ioctl,
2801         .fsync          = btrfs_sync_file,
2802 };
2803
2804 static struct inode_operations btrfs_symlink_inode_operations = {
2805         .readlink       = generic_readlink,
2806         .follow_link    = page_follow_link_light,
2807         .put_link       = page_put_link,
2808 };
2809
2810 static int __init init_btrfs_fs(void)
2811 {
2812         int err;
2813         printk("btrfs loaded!\n");
2814         err = init_inodecache();
2815         if (err)
2816                 return err;
2817         return register_filesystem(&btrfs_fs_type);
2818         destroy_inodecache();
2819         return err;
2820 }
2821
2822 static void __exit exit_btrfs_fs(void)
2823 {
2824         destroy_inodecache();
2825         unregister_filesystem(&btrfs_fs_type);
2826         printk("btrfs unloaded\n");
2827 }
2828
2829 module_init(init_btrfs_fs)
2830 module_exit(exit_btrfs_fs)
2831
2832 MODULE_LICENSE("GPL");