Btrfs: sparse files!
[linux-2.6] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
15 #include "ctree.h"
16 #include "disk-io.h"
17 #include "transaction.h"
18 #include "btrfs_inode.h"
19 #include "ioctl.h"
20 #include "print-tree.h"
21
22 struct btrfs_iget_args {
23         u64 ino;
24         struct btrfs_root *root;
25 };
26
27 #define BTRFS_SUPER_MAGIC 0x9123682E
28
29 static struct inode_operations btrfs_dir_inode_operations;
30 static struct inode_operations btrfs_symlink_inode_operations;
31 static struct inode_operations btrfs_dir_ro_inode_operations;
32 static struct super_operations btrfs_super_ops;
33 static struct file_operations btrfs_dir_file_operations;
34 static struct inode_operations btrfs_file_inode_operations;
35 static struct address_space_operations btrfs_aops;
36 static struct address_space_operations btrfs_symlink_aops;
37 static struct file_operations btrfs_file_operations;
38
39 static void btrfs_read_locked_inode(struct inode *inode)
40 {
41         struct btrfs_path *path;
42         struct btrfs_inode_item *inode_item;
43         struct btrfs_root *root = BTRFS_I(inode)->root;
44         struct btrfs_key location;
45         struct btrfs_block_group_cache *alloc_group;
46         u64 alloc_group_block;
47         int ret;
48
49         path = btrfs_alloc_path();
50         BUG_ON(!path);
51         btrfs_init_path(path);
52         mutex_lock(&root->fs_info->fs_mutex);
53
54         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
55         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
56         if (ret) {
57                 btrfs_free_path(path);
58                 goto make_bad;
59         }
60         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
61                                   path->slots[0],
62                                   struct btrfs_inode_item);
63
64         inode->i_mode = btrfs_inode_mode(inode_item);
65         inode->i_nlink = btrfs_inode_nlink(inode_item);
66         inode->i_uid = btrfs_inode_uid(inode_item);
67         inode->i_gid = btrfs_inode_gid(inode_item);
68         inode->i_size = btrfs_inode_size(inode_item);
69         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
70         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
71         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
72         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
73         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
74         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
75         inode->i_blocks = btrfs_inode_nblocks(inode_item);
76         inode->i_generation = btrfs_inode_generation(inode_item);
77         alloc_group_block = btrfs_inode_block_group(inode_item);
78         ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
79                                      (void **)&alloc_group,
80                                      alloc_group_block, 1);
81         BUG_ON(!ret);
82         BTRFS_I(inode)->block_group = alloc_group;
83
84         btrfs_free_path(path);
85         inode_item = NULL;
86
87         mutex_unlock(&root->fs_info->fs_mutex);
88
89         switch (inode->i_mode & S_IFMT) {
90 #if 0
91         default:
92                 init_special_inode(inode, inode->i_mode,
93                                    btrfs_inode_rdev(inode_item));
94                 break;
95 #endif
96         case S_IFREG:
97                 inode->i_mapping->a_ops = &btrfs_aops;
98                 inode->i_fop = &btrfs_file_operations;
99                 inode->i_op = &btrfs_file_inode_operations;
100                 break;
101         case S_IFDIR:
102                 inode->i_fop = &btrfs_dir_file_operations;
103                 if (root == root->fs_info->tree_root)
104                         inode->i_op = &btrfs_dir_ro_inode_operations;
105                 else
106                         inode->i_op = &btrfs_dir_inode_operations;
107                 break;
108         case S_IFLNK:
109                 inode->i_op = &btrfs_symlink_inode_operations;
110                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
111                 break;
112         }
113         return;
114
115 make_bad:
116         btrfs_release_path(root, path);
117         btrfs_free_path(path);
118         mutex_unlock(&root->fs_info->fs_mutex);
119         make_bad_inode(inode);
120 }
121
122 static void fill_inode_item(struct btrfs_inode_item *item,
123                             struct inode *inode)
124 {
125         btrfs_set_inode_uid(item, inode->i_uid);
126         btrfs_set_inode_gid(item, inode->i_gid);
127         btrfs_set_inode_size(item, inode->i_size);
128         btrfs_set_inode_mode(item, inode->i_mode);
129         btrfs_set_inode_nlink(item, inode->i_nlink);
130         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
131         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
132         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
133         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
134         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
135         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
136         btrfs_set_inode_nblocks(item, inode->i_blocks);
137         btrfs_set_inode_generation(item, inode->i_generation);
138         btrfs_set_inode_block_group(item,
139                                     BTRFS_I(inode)->block_group->key.objectid);
140 }
141
142 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
143                               struct btrfs_root *root,
144                               struct inode *inode)
145 {
146         struct btrfs_inode_item *inode_item;
147         struct btrfs_path *path;
148         int ret;
149
150         path = btrfs_alloc_path();
151         BUG_ON(!path);
152         btrfs_init_path(path);
153         ret = btrfs_lookup_inode(trans, root, path,
154                                  &BTRFS_I(inode)->location, 1);
155         if (ret) {
156                 if (ret > 0)
157                         ret = -ENOENT;
158                 goto failed;
159         }
160
161         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
162                                   path->slots[0],
163                                   struct btrfs_inode_item);
164
165         fill_inode_item(inode_item, inode);
166         btrfs_mark_buffer_dirty(path->nodes[0]);
167         ret = 0;
168 failed:
169         btrfs_release_path(root, path);
170         btrfs_free_path(path);
171         return ret;
172 }
173
174
175 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
176                               struct btrfs_root *root,
177                               struct inode *dir,
178                               struct dentry *dentry)
179 {
180         struct btrfs_path *path;
181         const char *name = dentry->d_name.name;
182         int name_len = dentry->d_name.len;
183         int ret = 0;
184         u64 objectid;
185         struct btrfs_dir_item *di;
186
187         path = btrfs_alloc_path();
188         BUG_ON(!path);
189         btrfs_init_path(path);
190         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
191                                     name, name_len, -1);
192         if (IS_ERR(di)) {
193                 ret = PTR_ERR(di);
194                 goto err;
195         }
196         if (!di) {
197                 ret = -ENOENT;
198                 goto err;
199         }
200         objectid = btrfs_disk_key_objectid(&di->location);
201         ret = btrfs_delete_one_dir_name(trans, root, path, di);
202         BUG_ON(ret);
203         btrfs_release_path(root, path);
204
205         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
206                                          objectid, name, name_len, -1);
207         if (IS_ERR(di)) {
208                 ret = PTR_ERR(di);
209                 goto err;
210         }
211         if (!di) {
212                 ret = -ENOENT;
213                 goto err;
214         }
215         ret = btrfs_delete_one_dir_name(trans, root, path, di);
216         BUG_ON(ret);
217
218         dentry->d_inode->i_ctime = dir->i_ctime;
219 err:
220         btrfs_free_path(path);
221         if (!ret) {
222                 dir->i_size -= name_len * 2;
223                 btrfs_update_inode(trans, root, dir);
224                 drop_nlink(dentry->d_inode);
225                 btrfs_update_inode(trans, root, dentry->d_inode);
226                 dir->i_sb->s_dirt = 1;
227         }
228         return ret;
229 }
230
231 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
232 {
233         struct btrfs_root *root;
234         struct btrfs_trans_handle *trans;
235         int ret;
236
237         root = BTRFS_I(dir)->root;
238         mutex_lock(&root->fs_info->fs_mutex);
239         trans = btrfs_start_transaction(root, 1);
240         btrfs_set_trans_block_group(trans, dir);
241         ret = btrfs_unlink_trans(trans, root, dir, dentry);
242         btrfs_end_transaction(trans, root);
243         mutex_unlock(&root->fs_info->fs_mutex);
244         btrfs_btree_balance_dirty(root);
245         return ret;
246 }
247
248 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
249 {
250         struct inode *inode = dentry->d_inode;
251         int err;
252         int ret;
253         struct btrfs_root *root = BTRFS_I(dir)->root;
254         struct btrfs_path *path;
255         struct btrfs_key key;
256         struct btrfs_trans_handle *trans;
257         struct btrfs_key found_key;
258         int found_type;
259         struct btrfs_leaf *leaf;
260         char *goodnames = "..";
261
262         path = btrfs_alloc_path();
263         BUG_ON(!path);
264         btrfs_init_path(path);
265         mutex_lock(&root->fs_info->fs_mutex);
266         trans = btrfs_start_transaction(root, 1);
267         btrfs_set_trans_block_group(trans, dir);
268         key.objectid = inode->i_ino;
269         key.offset = (u64)-1;
270         key.flags = (u32)-1;
271         while(1) {
272                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
273                 if (ret < 0) {
274                         err = ret;
275                         goto out;
276                 }
277                 BUG_ON(ret == 0);
278                 if (path->slots[0] == 0) {
279                         err = -ENOENT;
280                         goto out;
281                 }
282                 path->slots[0]--;
283                 leaf = btrfs_buffer_leaf(path->nodes[0]);
284                 btrfs_disk_key_to_cpu(&found_key,
285                                       &leaf->items[path->slots[0]].key);
286                 found_type = btrfs_key_type(&found_key);
287                 if (found_key.objectid != inode->i_ino) {
288                         err = -ENOENT;
289                         goto out;
290                 }
291                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
292                      found_type != BTRFS_DIR_INDEX_KEY) ||
293                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
294                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
295                         err = -ENOTEMPTY;
296                         goto out;
297                 }
298                 ret = btrfs_del_item(trans, root, path);
299                 BUG_ON(ret);
300
301                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
302                         break;
303                 btrfs_release_path(root, path);
304         }
305         ret = 0;
306         btrfs_release_path(root, path);
307
308         /* now the directory is empty */
309         err = btrfs_unlink_trans(trans, root, dir, dentry);
310         if (!err) {
311                 inode->i_size = 0;
312         }
313 out:
314         btrfs_release_path(root, path);
315         btrfs_free_path(path);
316         mutex_unlock(&root->fs_info->fs_mutex);
317         ret = btrfs_end_transaction(trans, root);
318         btrfs_btree_balance_dirty(root);
319         if (ret && !err)
320                 err = ret;
321         return err;
322 }
323
324 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
325                             struct btrfs_root *root,
326                             struct inode *inode)
327 {
328         struct btrfs_path *path;
329         int ret;
330
331         clear_inode(inode);
332
333         path = btrfs_alloc_path();
334         BUG_ON(!path);
335         btrfs_init_path(path);
336         ret = btrfs_lookup_inode(trans, root, path,
337                                  &BTRFS_I(inode)->location, -1);
338         BUG_ON(ret);
339         ret = btrfs_del_item(trans, root, path);
340         BUG_ON(ret);
341         btrfs_free_path(path);
342         return ret;
343 }
344
345 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
346                            u64 objectid)
347 {
348         struct btrfs_node *node;
349         int i;
350         int nritems;
351         u64 item_objectid;
352         u64 blocknr;
353         int slot;
354         int ret;
355
356         if (!path->nodes[1])
357                 return;
358         node = btrfs_buffer_node(path->nodes[1]);
359         slot = path->slots[1];
360         if (slot == 0)
361                 return;
362         nritems = btrfs_header_nritems(&node->header);
363         for (i = slot - 1; i >= 0; i--) {
364                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
365                 if (item_objectid != objectid)
366                         break;
367                 blocknr = btrfs_node_blockptr(node, i);
368                 ret = readahead_tree_block(root, blocknr);
369                 if (ret)
370                         break;
371         }
372 }
373
374 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
375                                    struct btrfs_root *root,
376                                    struct inode *inode)
377 {
378         int ret;
379         struct btrfs_path *path;
380         struct btrfs_key key;
381         struct btrfs_disk_key *found_key;
382         u32 found_type;
383         struct btrfs_leaf *leaf;
384         struct btrfs_file_extent_item *fi = NULL;
385         u64 extent_start = 0;
386         u64 extent_num_blocks = 0;
387         int found_extent;
388
389         path = btrfs_alloc_path();
390         BUG_ON(!path);
391         /* FIXME, add redo link to tree so we don't leak on crash */
392         key.objectid = inode->i_ino;
393         key.offset = (u64)-1;
394         key.flags = (u32)-1;
395         while(1) {
396                 btrfs_init_path(path);
397                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
398                 if (ret < 0) {
399                         goto error;
400                 }
401                 if (ret > 0) {
402                         BUG_ON(path->slots[0] == 0);
403                         path->slots[0]--;
404                 }
405                 reada_truncate(root, path, inode->i_ino);
406                 leaf = btrfs_buffer_leaf(path->nodes[0]);
407                 found_key = &leaf->items[path->slots[0]].key;
408                 found_type = btrfs_disk_key_type(found_key);
409                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
410                         break;
411                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
412                     found_type != BTRFS_DIR_ITEM_KEY &&
413                     found_type != BTRFS_DIR_INDEX_KEY &&
414                     found_type != BTRFS_EXTENT_DATA_KEY)
415                         break;
416                 if (btrfs_disk_key_offset(found_key) < inode->i_size)
417                         break;
418                 found_extent = 0;
419                 if (btrfs_disk_key_type(found_key) == BTRFS_EXTENT_DATA_KEY) {
420                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
421                                             path->slots[0],
422                                             struct btrfs_file_extent_item);
423                         if (btrfs_file_extent_type(fi) !=
424                             BTRFS_FILE_EXTENT_INLINE) {
425                                 u64 num_dec;
426                                 extent_start =
427                                         btrfs_file_extent_disk_blocknr(fi);
428                                 extent_num_blocks =
429                                         btrfs_file_extent_disk_num_blocks(fi);
430                                 /* FIXME blocksize != 4096 */
431                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
432                                 if (extent_start != 0) {
433                                         found_extent = 1;
434                                         inode->i_blocks -= num_dec;
435                                 }
436                         }
437                 }
438                 ret = btrfs_del_item(trans, root, path);
439                 BUG_ON(ret);
440                 btrfs_release_path(root, path);
441                 if (found_extent) {
442                         ret = btrfs_free_extent(trans, root, extent_start,
443                                                 extent_num_blocks, 0);
444                         BUG_ON(ret);
445                 }
446         }
447         ret = 0;
448 error:
449         btrfs_release_path(root, path);
450         btrfs_free_path(path);
451         inode->i_sb->s_dirt = 1;
452         return ret;
453 }
454
455 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
456 {
457         struct inode *inode = dentry->d_inode;
458         int err;
459
460         err = inode_change_ok(inode, attr);
461         if (err)
462                 return err;
463
464         if (S_ISREG(inode->i_mode) &&
465             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
466                 struct btrfs_trans_handle *trans;
467                 struct btrfs_root *root = BTRFS_I(inode)->root;
468                 u64 mask = root->blocksize - 1;
469                 u64 pos = (inode->i_size + mask) & ~mask;
470                 u64 hole_size;
471
472                 if (attr->ia_size < pos)
473                         goto out;
474                 hole_size = (attr->ia_size - pos + mask) & ~mask;
475                 hole_size >>= inode->i_blkbits;
476
477                 mutex_lock(&root->fs_info->fs_mutex);
478                 trans = btrfs_start_transaction(root, 1);
479                 btrfs_set_trans_block_group(trans, inode);
480                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
481                                                pos, 0, 0, hole_size);
482                 BUG_ON(err);
483                 btrfs_end_transaction(trans, root);
484                 mutex_unlock(&root->fs_info->fs_mutex);
485         }
486
487         err = inode_setattr(inode, attr);
488
489 out:
490         return err;
491 }
492 static void btrfs_delete_inode(struct inode *inode)
493 {
494         struct btrfs_trans_handle *trans;
495         struct btrfs_root *root = BTRFS_I(inode)->root;
496         int ret;
497
498         truncate_inode_pages(&inode->i_data, 0);
499         if (is_bad_inode(inode)) {
500                 goto no_delete;
501         }
502         inode->i_size = 0;
503         mutex_lock(&root->fs_info->fs_mutex);
504         trans = btrfs_start_transaction(root, 1);
505         btrfs_set_trans_block_group(trans, inode);
506         ret = btrfs_truncate_in_trans(trans, root, inode);
507         BUG_ON(ret);
508         btrfs_free_inode(trans, root, inode);
509         btrfs_end_transaction(trans, root);
510         mutex_unlock(&root->fs_info->fs_mutex);
511         btrfs_btree_balance_dirty(root);
512         return;
513 no_delete:
514         clear_inode(inode);
515 }
516
517 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
518                                struct btrfs_key *location)
519 {
520         const char *name = dentry->d_name.name;
521         int namelen = dentry->d_name.len;
522         struct btrfs_dir_item *di;
523         struct btrfs_path *path;
524         struct btrfs_root *root = BTRFS_I(dir)->root;
525         int ret;
526
527         path = btrfs_alloc_path();
528         BUG_ON(!path);
529         btrfs_init_path(path);
530         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
531                                     namelen, 0);
532         if (!di || IS_ERR(di)) {
533                 location->objectid = 0;
534                 ret = 0;
535                 goto out;
536         }
537         btrfs_disk_key_to_cpu(location, &di->location);
538 out:
539         btrfs_release_path(root, path);
540         btrfs_free_path(path);
541         return ret;
542 }
543
544 static int fixup_tree_root_location(struct btrfs_root *root,
545                              struct btrfs_key *location,
546                              struct btrfs_root **sub_root)
547 {
548         struct btrfs_path *path;
549         struct btrfs_root_item *ri;
550
551         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
552                 return 0;
553         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
554                 return 0;
555
556         path = btrfs_alloc_path();
557         BUG_ON(!path);
558         mutex_lock(&root->fs_info->fs_mutex);
559
560         *sub_root = btrfs_read_fs_root(root->fs_info, location);
561         if (IS_ERR(*sub_root))
562                 return PTR_ERR(*sub_root);
563
564         ri = &(*sub_root)->root_item;
565         location->objectid = btrfs_root_dirid(ri);
566         location->flags = 0;
567         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
568         location->offset = 0;
569
570         btrfs_free_path(path);
571         mutex_unlock(&root->fs_info->fs_mutex);
572         return 0;
573 }
574
575 static int btrfs_init_locked_inode(struct inode *inode, void *p)
576 {
577         struct btrfs_iget_args *args = p;
578         inode->i_ino = args->ino;
579         BTRFS_I(inode)->root = args->root;
580         return 0;
581 }
582
583 static int btrfs_find_actor(struct inode *inode, void *opaque)
584 {
585         struct btrfs_iget_args *args = opaque;
586         return (args->ino == inode->i_ino &&
587                 args->root == BTRFS_I(inode)->root);
588 }
589
590 static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
591                                        struct btrfs_root *root)
592 {
593         struct inode *inode;
594         struct btrfs_iget_args args;
595         args.ino = objectid;
596         args.root = root;
597
598         inode = iget5_locked(s, objectid, btrfs_find_actor,
599                              btrfs_init_locked_inode,
600                              (void *)&args);
601         return inode;
602 }
603
604 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
605                                    struct nameidata *nd)
606 {
607         struct inode * inode;
608         struct btrfs_inode *bi = BTRFS_I(dir);
609         struct btrfs_root *root = bi->root;
610         struct btrfs_root *sub_root = root;
611         struct btrfs_key location;
612         int ret;
613
614         if (dentry->d_name.len > BTRFS_NAME_LEN)
615                 return ERR_PTR(-ENAMETOOLONG);
616         mutex_lock(&root->fs_info->fs_mutex);
617         ret = btrfs_inode_by_name(dir, dentry, &location);
618         mutex_unlock(&root->fs_info->fs_mutex);
619         if (ret < 0)
620                 return ERR_PTR(ret);
621         inode = NULL;
622         if (location.objectid) {
623                 ret = fixup_tree_root_location(root, &location, &sub_root);
624                 if (ret < 0)
625                         return ERR_PTR(ret);
626                 if (ret > 0)
627                         return ERR_PTR(-ENOENT);
628                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
629                                           sub_root);
630                 if (!inode)
631                         return ERR_PTR(-EACCES);
632                 if (inode->i_state & I_NEW) {
633                         if (sub_root != root) {
634 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
635                                 igrab(inode);
636                                 sub_root->inode = inode;
637                         }
638                         BTRFS_I(inode)->root = sub_root;
639                         memcpy(&BTRFS_I(inode)->location, &location,
640                                sizeof(location));
641                         btrfs_read_locked_inode(inode);
642                         unlock_new_inode(inode);
643                 }
644         }
645         return d_splice_alias(inode, dentry);
646 }
647
648 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
649                          u64 objectid)
650 {
651         struct btrfs_node *node;
652         int i;
653         u32 nritems;
654         u64 item_objectid;
655         u64 blocknr;
656         int slot;
657         int ret;
658
659         if (!path->nodes[1])
660                 return;
661         node = btrfs_buffer_node(path->nodes[1]);
662         slot = path->slots[1];
663         nritems = btrfs_header_nritems(&node->header);
664         for (i = slot + 1; i < nritems; i++) {
665                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
666                 if (item_objectid != objectid)
667                         break;
668                 blocknr = btrfs_node_blockptr(node, i);
669                 ret = readahead_tree_block(root, blocknr);
670                 if (ret)
671                         break;
672         }
673 }
674
675 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
676 {
677         struct inode *inode = filp->f_path.dentry->d_inode;
678         struct btrfs_root *root = BTRFS_I(inode)->root;
679         struct btrfs_item *item;
680         struct btrfs_dir_item *di;
681         struct btrfs_key key;
682         struct btrfs_path *path;
683         int ret;
684         u32 nritems;
685         struct btrfs_leaf *leaf;
686         int slot;
687         int advance;
688         unsigned char d_type = DT_UNKNOWN;
689         int over = 0;
690         u32 di_cur;
691         u32 di_total;
692         u32 di_len;
693         int key_type = BTRFS_DIR_INDEX_KEY;
694
695         /* FIXME, use a real flag for deciding about the key type */
696         if (root->fs_info->tree_root == root)
697                 key_type = BTRFS_DIR_ITEM_KEY;
698         mutex_lock(&root->fs_info->fs_mutex);
699         key.objectid = inode->i_ino;
700         key.flags = 0;
701         btrfs_set_key_type(&key, key_type);
702         key.offset = filp->f_pos;
703         path = btrfs_alloc_path();
704         btrfs_init_path(path);
705         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
706         if (ret < 0)
707                 goto err;
708         advance = 0;
709         reada_leaves(root, path, inode->i_ino);
710         while(1) {
711                 leaf = btrfs_buffer_leaf(path->nodes[0]);
712                 nritems = btrfs_header_nritems(&leaf->header);
713                 slot = path->slots[0];
714                 if (advance || slot >= nritems) {
715                         if (slot >= nritems -1) {
716                                 reada_leaves(root, path, inode->i_ino);
717                                 ret = btrfs_next_leaf(root, path);
718                                 if (ret)
719                                         break;
720                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
721                                 nritems = btrfs_header_nritems(&leaf->header);
722                                 slot = path->slots[0];
723                         } else {
724                                 slot++;
725                                 path->slots[0]++;
726                         }
727                 }
728                 advance = 1;
729                 item = leaf->items + slot;
730                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
731                         break;
732                 if (btrfs_disk_key_type(&item->key) != key_type)
733                         break;
734                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
735                         continue;
736                 filp->f_pos = btrfs_disk_key_offset(&item->key);
737                 advance = 1;
738                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
739                 di_cur = 0;
740                 di_total = btrfs_item_size(leaf->items + slot);
741                 while(di_cur < di_total) {
742                         over = filldir(dirent, (const char *)(di + 1),
743                                        btrfs_dir_name_len(di),
744                                        btrfs_disk_key_offset(&item->key),
745                                        btrfs_disk_key_objectid(&di->location),
746                                        d_type);
747                         if (over)
748                                 goto nopos;
749                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
750                         di_cur += di_len;
751                         di = (struct btrfs_dir_item *)((char *)di + di_len);
752                 }
753         }
754         filp->f_pos++;
755 nopos:
756         ret = 0;
757 err:
758         btrfs_release_path(root, path);
759         btrfs_free_path(path);
760         mutex_unlock(&root->fs_info->fs_mutex);
761         return ret;
762 }
763
764 static void btrfs_put_super (struct super_block * sb)
765 {
766         struct btrfs_root *root = btrfs_sb(sb);
767         int ret;
768
769         ret = close_ctree(root);
770         if (ret) {
771                 printk("close ctree returns %d\n", ret);
772         }
773         sb->s_fs_info = NULL;
774 }
775
776 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
777 {
778         struct inode * inode;
779         struct dentry * root_dentry;
780         struct btrfs_super_block *disk_super;
781         struct btrfs_root *tree_root;
782         struct btrfs_inode *bi;
783
784         sb->s_maxbytes = MAX_LFS_FILESIZE;
785         sb->s_magic = BTRFS_SUPER_MAGIC;
786         sb->s_op = &btrfs_super_ops;
787         sb->s_time_gran = 1;
788
789         tree_root = open_ctree(sb);
790
791         if (!tree_root) {
792                 printk("btrfs: open_ctree failed\n");
793                 return -EIO;
794         }
795         sb->s_fs_info = tree_root;
796         disk_super = tree_root->fs_info->disk_super;
797         printk("read in super total blocks %Lu root %Lu\n",
798                btrfs_super_total_blocks(disk_super),
799                btrfs_super_root_dir(disk_super));
800
801         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
802                                   tree_root);
803         bi = BTRFS_I(inode);
804         bi->location.objectid = inode->i_ino;
805         bi->location.offset = 0;
806         bi->location.flags = 0;
807         bi->root = tree_root;
808         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
809
810         if (!inode)
811                 return -ENOMEM;
812         if (inode->i_state & I_NEW) {
813                 btrfs_read_locked_inode(inode);
814                 unlock_new_inode(inode);
815         }
816
817         root_dentry = d_alloc_root(inode);
818         if (!root_dentry) {
819                 iput(inode);
820                 return -ENOMEM;
821         }
822         sb->s_root = root_dentry;
823
824         return 0;
825 }
826
827 static int btrfs_write_inode(struct inode *inode, int wait)
828 {
829         struct btrfs_root *root = BTRFS_I(inode)->root;
830         struct btrfs_trans_handle *trans;
831         int ret = 0;
832
833         if (wait) {
834                 mutex_lock(&root->fs_info->fs_mutex);
835                 trans = btrfs_start_transaction(root, 1);
836                 btrfs_set_trans_block_group(trans, inode);
837                 ret = btrfs_commit_transaction(trans, root);
838                 mutex_unlock(&root->fs_info->fs_mutex);
839         }
840         return ret;
841 }
842
843 static void btrfs_dirty_inode(struct inode *inode)
844 {
845         struct btrfs_root *root = BTRFS_I(inode)->root;
846         struct btrfs_trans_handle *trans;
847
848         mutex_lock(&root->fs_info->fs_mutex);
849         trans = btrfs_start_transaction(root, 1);
850         btrfs_set_trans_block_group(trans, inode);
851         btrfs_update_inode(trans, root, inode);
852         btrfs_end_transaction(trans, root);
853         mutex_unlock(&root->fs_info->fs_mutex);
854         btrfs_btree_balance_dirty(root);
855 }
856
857 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
858                                      struct btrfs_root *root,
859                                      u64 objectid,
860                                      struct btrfs_block_group_cache *group,
861                                      int mode)
862 {
863         struct inode *inode;
864         struct btrfs_inode_item inode_item;
865         struct btrfs_key *location;
866         int ret;
867         int owner;
868
869         inode = new_inode(root->fs_info->sb);
870         if (!inode)
871                 return ERR_PTR(-ENOMEM);
872
873         BTRFS_I(inode)->root = root;
874         if (mode & S_IFDIR)
875                 owner = 0;
876         else
877                 owner = 1;
878         group = btrfs_find_block_group(root, group, 0, 0, owner);
879         BTRFS_I(inode)->block_group = group;
880
881         inode->i_uid = current->fsuid;
882         inode->i_gid = current->fsgid;
883         inode->i_mode = mode;
884         inode->i_ino = objectid;
885         inode->i_blocks = 0;
886         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
887         fill_inode_item(&inode_item, inode);
888         location = &BTRFS_I(inode)->location;
889         location->objectid = objectid;
890         location->flags = 0;
891         location->offset = 0;
892         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
893
894         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
895         BUG_ON(ret);
896
897         insert_inode_hash(inode);
898         return inode;
899 }
900
901 static int btrfs_add_link(struct btrfs_trans_handle *trans,
902                             struct dentry *dentry, struct inode *inode)
903 {
904         int ret;
905         struct btrfs_key key;
906         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
907         key.objectid = inode->i_ino;
908         key.flags = 0;
909         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
910         key.offset = 0;
911
912         ret = btrfs_insert_dir_item(trans, root,
913                                     dentry->d_name.name, dentry->d_name.len,
914                                     dentry->d_parent->d_inode->i_ino,
915                                     &key, 0);
916         if (ret == 0) {
917                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
918                 ret = btrfs_update_inode(trans, root,
919                                          dentry->d_parent->d_inode);
920         }
921         return ret;
922 }
923
924 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
925                             struct dentry *dentry, struct inode *inode)
926 {
927         int err = btrfs_add_link(trans, dentry, inode);
928         if (!err) {
929                 d_instantiate(dentry, inode);
930                 return 0;
931         }
932         if (err > 0)
933                 err = -EEXIST;
934         return err;
935 }
936
937 static int btrfs_create(struct inode *dir, struct dentry *dentry,
938                         int mode, struct nameidata *nd)
939 {
940         struct btrfs_trans_handle *trans;
941         struct btrfs_root *root = BTRFS_I(dir)->root;
942         struct inode *inode;
943         int err;
944         int drop_inode = 0;
945         u64 objectid;
946
947         mutex_lock(&root->fs_info->fs_mutex);
948         trans = btrfs_start_transaction(root, 1);
949         btrfs_set_trans_block_group(trans, dir);
950
951         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
952         if (err) {
953                 err = -ENOSPC;
954                 goto out_unlock;
955         }
956
957         inode = btrfs_new_inode(trans, root, objectid,
958                                 BTRFS_I(dir)->block_group, mode);
959         err = PTR_ERR(inode);
960         if (IS_ERR(inode))
961                 goto out_unlock;
962
963         btrfs_set_trans_block_group(trans, inode);
964         err = btrfs_add_nondir(trans, dentry, inode);
965         if (err)
966                 drop_inode = 1;
967         else {
968                 inode->i_mapping->a_ops = &btrfs_aops;
969                 inode->i_fop = &btrfs_file_operations;
970                 inode->i_op = &btrfs_file_inode_operations;
971         }
972         dir->i_sb->s_dirt = 1;
973         btrfs_update_inode_block_group(trans, inode);
974         btrfs_update_inode_block_group(trans, dir);
975 out_unlock:
976         btrfs_end_transaction(trans, root);
977         mutex_unlock(&root->fs_info->fs_mutex);
978
979         if (drop_inode) {
980                 inode_dec_link_count(inode);
981                 iput(inode);
982         }
983         btrfs_btree_balance_dirty(root);
984         return err;
985 }
986
987 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
988                       struct dentry *dentry)
989 {
990         struct btrfs_trans_handle *trans;
991         struct btrfs_root *root = BTRFS_I(dir)->root;
992         struct inode *inode = old_dentry->d_inode;
993         int err;
994         int drop_inode = 0;
995
996         if (inode->i_nlink == 0)
997                 return -ENOENT;
998
999         inc_nlink(inode);
1000         mutex_lock(&root->fs_info->fs_mutex);
1001         trans = btrfs_start_transaction(root, 1);
1002         btrfs_set_trans_block_group(trans, dir);
1003         atomic_inc(&inode->i_count);
1004         err = btrfs_add_nondir(trans, dentry, inode);
1005         if (err)
1006                 drop_inode = 1;
1007         dir->i_sb->s_dirt = 1;
1008         btrfs_update_inode_block_group(trans, dir);
1009         btrfs_update_inode(trans, root, inode);
1010
1011         btrfs_end_transaction(trans, root);
1012         mutex_unlock(&root->fs_info->fs_mutex);
1013
1014         if (drop_inode) {
1015                 inode_dec_link_count(inode);
1016                 iput(inode);
1017         }
1018         btrfs_btree_balance_dirty(root);
1019         return err;
1020 }
1021
1022 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1023                                 struct btrfs_root *root,
1024                                 u64 objectid, u64 dirid)
1025 {
1026         int ret;
1027         char buf[2];
1028         struct btrfs_key key;
1029
1030         buf[0] = '.';
1031         buf[1] = '.';
1032
1033         key.objectid = objectid;
1034         key.offset = 0;
1035         key.flags = 0;
1036         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1037
1038         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1039                                     &key, 1);
1040         if (ret)
1041                 goto error;
1042         key.objectid = dirid;
1043         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1044                                     &key, 1);
1045         if (ret)
1046                 goto error;
1047 error:
1048         return ret;
1049 }
1050
1051 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1052 {
1053         struct inode *inode;
1054         struct btrfs_trans_handle *trans;
1055         struct btrfs_root *root = BTRFS_I(dir)->root;
1056         int err = 0;
1057         int drop_on_err = 0;
1058         u64 objectid;
1059
1060         mutex_lock(&root->fs_info->fs_mutex);
1061         trans = btrfs_start_transaction(root, 1);
1062         btrfs_set_trans_block_group(trans, dir);
1063         if (IS_ERR(trans)) {
1064                 err = PTR_ERR(trans);
1065                 goto out_unlock;
1066         }
1067
1068         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1069         if (err) {
1070                 err = -ENOSPC;
1071                 goto out_unlock;
1072         }
1073
1074         inode = btrfs_new_inode(trans, root, objectid,
1075                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1076         if (IS_ERR(inode)) {
1077                 err = PTR_ERR(inode);
1078                 goto out_fail;
1079         }
1080         drop_on_err = 1;
1081         inode->i_op = &btrfs_dir_inode_operations;
1082         inode->i_fop = &btrfs_dir_file_operations;
1083         btrfs_set_trans_block_group(trans, inode);
1084
1085         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1086         if (err)
1087                 goto out_fail;
1088
1089         inode->i_size = 6;
1090         err = btrfs_update_inode(trans, root, inode);
1091         if (err)
1092                 goto out_fail;
1093         err = btrfs_add_link(trans, dentry, inode);
1094         if (err)
1095                 goto out_fail;
1096         d_instantiate(dentry, inode);
1097         drop_on_err = 0;
1098         dir->i_sb->s_dirt = 1;
1099         btrfs_update_inode_block_group(trans, inode);
1100         btrfs_update_inode_block_group(trans, dir);
1101
1102 out_fail:
1103         btrfs_end_transaction(trans, root);
1104 out_unlock:
1105         mutex_unlock(&root->fs_info->fs_mutex);
1106         if (drop_on_err)
1107                 iput(inode);
1108         btrfs_btree_balance_dirty(root);
1109         return err;
1110 }
1111
1112 static int btrfs_sync_file(struct file *file,
1113                            struct dentry *dentry, int datasync)
1114 {
1115         struct inode *inode = dentry->d_inode;
1116         struct btrfs_root *root = BTRFS_I(inode)->root;
1117         int ret;
1118         struct btrfs_trans_handle *trans;
1119
1120         mutex_lock(&root->fs_info->fs_mutex);
1121         trans = btrfs_start_transaction(root, 1);
1122         if (!trans) {
1123                 ret = -ENOMEM;
1124                 goto out;
1125         }
1126         ret = btrfs_commit_transaction(trans, root);
1127         mutex_unlock(&root->fs_info->fs_mutex);
1128 out:
1129         return ret > 0 ? EIO : ret;
1130 }
1131
1132 static int btrfs_sync_fs(struct super_block *sb, int wait)
1133 {
1134         struct btrfs_trans_handle *trans;
1135         struct btrfs_root *root;
1136         int ret;
1137         root = btrfs_sb(sb);
1138
1139         sb->s_dirt = 0;
1140         if (!wait) {
1141                 filemap_flush(root->fs_info->btree_inode->i_mapping);
1142                 return 0;
1143         }
1144         mutex_lock(&root->fs_info->fs_mutex);
1145         trans = btrfs_start_transaction(root, 1);
1146         ret = btrfs_commit_transaction(trans, root);
1147         sb->s_dirt = 0;
1148         BUG_ON(ret);
1149 printk("btrfs sync_fs\n");
1150         mutex_unlock(&root->fs_info->fs_mutex);
1151         return 0;
1152 }
1153
1154 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1155                            struct buffer_head *result, int create)
1156 {
1157         int ret;
1158         int err = 0;
1159         u64 blocknr;
1160         u64 extent_start = 0;
1161         u64 extent_end = 0;
1162         u64 objectid = inode->i_ino;
1163         u32 found_type;
1164         struct btrfs_path *path;
1165         struct btrfs_root *root = BTRFS_I(inode)->root;
1166         struct btrfs_file_extent_item *item;
1167         struct btrfs_leaf *leaf;
1168         struct btrfs_disk_key *found_key;
1169
1170         path = btrfs_alloc_path();
1171         BUG_ON(!path);
1172         btrfs_init_path(path);
1173         if (create) {
1174                 WARN_ON(1);
1175         }
1176
1177         ret = btrfs_lookup_file_extent(NULL, root, path,
1178                                        inode->i_ino,
1179                                        iblock << inode->i_blkbits, 0);
1180         if (ret < 0) {
1181                 err = ret;
1182                 goto out;
1183         }
1184
1185         if (ret != 0) {
1186                 if (path->slots[0] == 0) {
1187                         btrfs_release_path(root, path);
1188                         goto out;
1189                 }
1190                 path->slots[0]--;
1191         }
1192
1193         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1194                               struct btrfs_file_extent_item);
1195         leaf = btrfs_buffer_leaf(path->nodes[0]);
1196         blocknr = btrfs_file_extent_disk_blocknr(item);
1197         blocknr += btrfs_file_extent_offset(item);
1198
1199         /* are we inside the extent that was found? */
1200         found_key = &leaf->items[path->slots[0]].key;
1201         found_type = btrfs_disk_key_type(found_key);
1202         if (btrfs_disk_key_objectid(found_key) != objectid ||
1203             found_type != BTRFS_EXTENT_DATA_KEY) {
1204                 extent_end = 0;
1205                 extent_start = 0;
1206                 goto out;
1207         }
1208         found_type = btrfs_file_extent_type(item);
1209         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1210         if (found_type == BTRFS_FILE_EXTENT_REG) {
1211                 extent_start = extent_start >> inode->i_blkbits;
1212                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1213                 err = 0;
1214                 if (blocknr == 0)
1215                         goto out;
1216                 if (iblock >= extent_start && iblock < extent_end) {
1217                         btrfs_map_bh_to_logical(root, result, blocknr +
1218                                                 iblock - extent_start);
1219                         goto out;
1220                 }
1221         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1222                 char *ptr;
1223                 char *map;
1224                 u32 size;
1225                 size = btrfs_file_extent_inline_len(leaf->items +
1226                                                     path->slots[0]);
1227                 extent_end = (extent_start + size) >> inode->i_blkbits;
1228                 extent_start >>= inode->i_blkbits;
1229                 if (iblock < extent_start || iblock > extent_end) {
1230                         goto out;
1231                 }
1232                 ptr = btrfs_file_extent_inline_start(item);
1233                 map = kmap(result->b_page);
1234                 memcpy(map, ptr, size);
1235                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1236                 flush_dcache_page(result->b_page);
1237                 kunmap(result->b_page);
1238                 set_buffer_uptodate(result);
1239                 SetPageChecked(result->b_page);
1240                 btrfs_map_bh_to_logical(root, result, 0);
1241         }
1242 out:
1243         btrfs_free_path(path);
1244         return err;
1245 }
1246
1247 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1248                            struct buffer_head *result, int create)
1249 {
1250         int err;
1251         struct btrfs_root *root = BTRFS_I(inode)->root;
1252         mutex_lock(&root->fs_info->fs_mutex);
1253         err = btrfs_get_block_lock(inode, iblock, result, create);
1254         mutex_unlock(&root->fs_info->fs_mutex);
1255         return err;
1256 }
1257
1258 static int btrfs_prepare_write(struct file *file, struct page *page,
1259                                unsigned from, unsigned to)
1260 {
1261         return nobh_prepare_write(page, from, to, btrfs_get_block);
1262 }
1263
1264 static void btrfs_write_super(struct super_block *sb)
1265 {
1266         btrfs_sync_fs(sb, 1);
1267 }
1268
1269 static int btrfs_readpage(struct file *file, struct page *page)
1270 {
1271         return mpage_readpage(page, btrfs_get_block);
1272 }
1273
1274 /*
1275  * While block_write_full_page is writing back the dirty buffers under
1276  * the page lock, whoever dirtied the buffers may decide to clean them
1277  * again at any time.  We handle that by only looking at the buffer
1278  * state inside lock_buffer().
1279  *
1280  * If block_write_full_page() is called for regular writeback
1281  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1282  * locked buffer.   This only can happen if someone has written the buffer
1283  * directly, with submit_bh().  At the address_space level PageWriteback
1284  * prevents this contention from occurring.
1285  */
1286 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1287                                    struct writeback_control *wbc)
1288 {
1289         int err;
1290         sector_t block;
1291         sector_t last_block;
1292         struct buffer_head *bh, *head;
1293         const unsigned blocksize = 1 << inode->i_blkbits;
1294         int nr_underway = 0;
1295
1296         BUG_ON(!PageLocked(page));
1297
1298         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1299
1300         if (!page_has_buffers(page)) {
1301                 create_empty_buffers(page, blocksize,
1302                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1303         }
1304
1305         /*
1306          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1307          * here, and the (potentially unmapped) buffers may become dirty at
1308          * any time.  If a buffer becomes dirty here after we've inspected it
1309          * then we just miss that fact, and the page stays dirty.
1310          *
1311          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1312          * handle that here by just cleaning them.
1313          */
1314
1315         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1316         head = page_buffers(page);
1317         bh = head;
1318
1319         /*
1320          * Get all the dirty buffers mapped to disk addresses and
1321          * handle any aliases from the underlying blockdev's mapping.
1322          */
1323         do {
1324                 if (block > last_block) {
1325                         /*
1326                          * mapped buffers outside i_size will occur, because
1327                          * this page can be outside i_size when there is a
1328                          * truncate in progress.
1329                          */
1330                         /*
1331                          * The buffer was zeroed by block_write_full_page()
1332                          */
1333                         clear_buffer_dirty(bh);
1334                         set_buffer_uptodate(bh);
1335                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1336                         WARN_ON(bh->b_size != blocksize);
1337                         err = btrfs_get_block(inode, block, bh, 0);
1338                         if (err) {
1339 printk("writepage going to recovery err %d\n", err);
1340                                 goto recover;
1341                         }
1342                         if (buffer_new(bh)) {
1343                                 /* blockdev mappings never come here */
1344                                 clear_buffer_new(bh);
1345                         }
1346                 }
1347                 bh = bh->b_this_page;
1348                 block++;
1349         } while (bh != head);
1350
1351         do {
1352                 if (!buffer_mapped(bh))
1353                         continue;
1354                 /*
1355                  * If it's a fully non-blocking write attempt and we cannot
1356                  * lock the buffer then redirty the page.  Note that this can
1357                  * potentially cause a busy-wait loop from pdflush and kswapd
1358                  * activity, but those code paths have their own higher-level
1359                  * throttling.
1360                  */
1361                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1362                         lock_buffer(bh);
1363                 } else if (test_set_buffer_locked(bh)) {
1364                         redirty_page_for_writepage(wbc, page);
1365                         continue;
1366                 }
1367                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1368                         mark_buffer_async_write(bh);
1369                 } else {
1370                         unlock_buffer(bh);
1371                 }
1372         } while ((bh = bh->b_this_page) != head);
1373
1374         /*
1375          * The page and its buffers are protected by PageWriteback(), so we can
1376          * drop the bh refcounts early.
1377          */
1378         BUG_ON(PageWriteback(page));
1379         set_page_writeback(page);
1380
1381         do {
1382                 struct buffer_head *next = bh->b_this_page;
1383                 if (buffer_async_write(bh)) {
1384                         submit_bh(WRITE, bh);
1385                         nr_underway++;
1386                 }
1387                 bh = next;
1388         } while (bh != head);
1389         unlock_page(page);
1390
1391         err = 0;
1392 done:
1393         if (nr_underway == 0) {
1394                 /*
1395                  * The page was marked dirty, but the buffers were
1396                  * clean.  Someone wrote them back by hand with
1397                  * ll_rw_block/submit_bh.  A rare case.
1398                  */
1399                 int uptodate = 1;
1400                 do {
1401                         if (!buffer_uptodate(bh)) {
1402                                 uptodate = 0;
1403                                 break;
1404                         }
1405                         bh = bh->b_this_page;
1406                 } while (bh != head);
1407                 if (uptodate)
1408                         SetPageUptodate(page);
1409                 end_page_writeback(page);
1410         }
1411         return err;
1412
1413 recover:
1414         /*
1415          * ENOSPC, or some other error.  We may already have added some
1416          * blocks to the file, so we need to write these out to avoid
1417          * exposing stale data.
1418          * The page is currently locked and not marked for writeback
1419          */
1420         bh = head;
1421         /* Recovery: lock and submit the mapped buffers */
1422         do {
1423                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1424                         lock_buffer(bh);
1425                         mark_buffer_async_write(bh);
1426                 } else {
1427                         /*
1428                          * The buffer may have been set dirty during
1429                          * attachment to a dirty page.
1430                          */
1431                         clear_buffer_dirty(bh);
1432                 }
1433         } while ((bh = bh->b_this_page) != head);
1434         SetPageError(page);
1435         BUG_ON(PageWriteback(page));
1436         set_page_writeback(page);
1437         do {
1438                 struct buffer_head *next = bh->b_this_page;
1439                 if (buffer_async_write(bh)) {
1440                         clear_buffer_dirty(bh);
1441                         submit_bh(WRITE, bh);
1442                         nr_underway++;
1443                 }
1444                 bh = next;
1445         } while (bh != head);
1446         unlock_page(page);
1447         goto done;
1448 }
1449
1450 /*
1451  * The generic ->writepage function for buffer-backed address_spaces
1452  */
1453 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1454 {
1455         struct inode * const inode = page->mapping->host;
1456         loff_t i_size = i_size_read(inode);
1457         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1458         unsigned offset;
1459         void *kaddr;
1460
1461         /* Is the page fully inside i_size? */
1462         if (page->index < end_index)
1463                 return __btrfs_write_full_page(inode, page, wbc);
1464
1465         /* Is the page fully outside i_size? (truncate in progress) */
1466         offset = i_size & (PAGE_CACHE_SIZE-1);
1467         if (page->index >= end_index+1 || !offset) {
1468                 /*
1469                  * The page may have dirty, unmapped buffers.  For example,
1470                  * they may have been added in ext3_writepage().  Make them
1471                  * freeable here, so the page does not leak.
1472                  */
1473                 block_invalidatepage(page, 0);
1474                 unlock_page(page);
1475                 return 0; /* don't care */
1476         }
1477
1478         /*
1479          * The page straddles i_size.  It must be zeroed out on each and every
1480          * writepage invokation because it may be mmapped.  "A file is mapped
1481          * in multiples of the page size.  For a file that is not a multiple of
1482          * the  page size, the remaining memory is zeroed when mapped, and
1483          * writes to that region are not written out to the file."
1484          */
1485         kaddr = kmap_atomic(page, KM_USER0);
1486         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1487         flush_dcache_page(page);
1488         kunmap_atomic(kaddr, KM_USER0);
1489         return __btrfs_write_full_page(inode, page, wbc);
1490 }
1491
1492 static void btrfs_truncate(struct inode *inode)
1493 {
1494         struct btrfs_root *root = BTRFS_I(inode)->root;
1495         int ret;
1496         struct btrfs_trans_handle *trans;
1497
1498         if (!S_ISREG(inode->i_mode))
1499                 return;
1500         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1501                 return;
1502
1503         nobh_truncate_page(inode->i_mapping, inode->i_size);
1504
1505         /* FIXME, add redo link to tree so we don't leak on crash */
1506         mutex_lock(&root->fs_info->fs_mutex);
1507         trans = btrfs_start_transaction(root, 1);
1508         btrfs_set_trans_block_group(trans, inode);
1509         ret = btrfs_truncate_in_trans(trans, root, inode);
1510         BUG_ON(ret);
1511         btrfs_update_inode(trans, root, inode);
1512         ret = btrfs_end_transaction(trans, root);
1513         BUG_ON(ret);
1514         mutex_unlock(&root->fs_info->fs_mutex);
1515         btrfs_btree_balance_dirty(root);
1516 }
1517
1518 /*
1519  * Make sure any changes to nobh_commit_write() are reflected in
1520  * nobh_truncate_page(), since it doesn't call commit_write().
1521  */
1522 static int btrfs_commit_write(struct file *file, struct page *page,
1523                               unsigned from, unsigned to)
1524 {
1525         struct inode *inode = page->mapping->host;
1526         struct buffer_head *bh;
1527         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1528
1529         SetPageUptodate(page);
1530         bh = page_buffers(page);
1531         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1532                 set_page_dirty(page);
1533         }
1534         if (pos > inode->i_size) {
1535                 i_size_write(inode, pos);
1536                 mark_inode_dirty(inode);
1537         }
1538         return 0;
1539 }
1540
1541 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1542                                 struct page **prepared_pages,
1543                                 const char __user * buf)
1544 {
1545         long page_fault = 0;
1546         int i;
1547         int offset = pos & (PAGE_CACHE_SIZE - 1);
1548
1549         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1550                 size_t count = min_t(size_t,
1551                                      PAGE_CACHE_SIZE - offset, write_bytes);
1552                 struct page *page = prepared_pages[i];
1553                 fault_in_pages_readable(buf, count);
1554
1555                 /* Copy data from userspace to the current page */
1556                 kmap(page);
1557                 page_fault = __copy_from_user(page_address(page) + offset,
1558                                               buf, count);
1559                 /* Flush processor's dcache for this page */
1560                 flush_dcache_page(page);
1561                 kunmap(page);
1562                 buf += count;
1563                 write_bytes -= count;
1564
1565                 if (page_fault)
1566                         break;
1567         }
1568         return page_fault ? -EFAULT : 0;
1569 }
1570
1571 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1572 {
1573         size_t i;
1574         for (i = 0; i < num_pages; i++) {
1575                 if (!pages[i])
1576                         break;
1577                 unlock_page(pages[i]);
1578                 mark_page_accessed(pages[i]);
1579                 page_cache_release(pages[i]);
1580         }
1581 }
1582 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1583                                    struct btrfs_root *root,
1584                                    struct file *file,
1585                                    struct page **pages,
1586                                    size_t num_pages,
1587                                    loff_t pos,
1588                                    size_t write_bytes)
1589 {
1590         int i;
1591         int offset;
1592         int err = 0;
1593         int ret;
1594         int this_write;
1595         struct inode *inode = file->f_path.dentry->d_inode;
1596         struct buffer_head *bh;
1597         struct btrfs_file_extent_item *ei;
1598
1599         for (i = 0; i < num_pages; i++) {
1600                 offset = pos & (PAGE_CACHE_SIZE -1);
1601                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1602                 /* FIXME, one block at a time */
1603
1604                 mutex_lock(&root->fs_info->fs_mutex);
1605                 trans = btrfs_start_transaction(root, 1);
1606                 btrfs_set_trans_block_group(trans, inode);
1607
1608                 bh = page_buffers(pages[i]);
1609                 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1610                         struct btrfs_key key;
1611                         struct btrfs_path *path;
1612                         char *ptr;
1613                         u32 datasize;
1614
1615                         path = btrfs_alloc_path();
1616                         BUG_ON(!path);
1617                         key.objectid = inode->i_ino;
1618                         key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1619                         key.flags = 0;
1620                         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1621                         BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1622                         datasize = offset +
1623                                 btrfs_file_extent_calc_inline_size(write_bytes);
1624                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1625                                                       datasize);
1626                         BUG_ON(ret);
1627                         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1628                                path->slots[0], struct btrfs_file_extent_item);
1629                         btrfs_set_file_extent_generation(ei, trans->transid);
1630                         btrfs_set_file_extent_type(ei,
1631                                                    BTRFS_FILE_EXTENT_INLINE);
1632                         ptr = btrfs_file_extent_inline_start(ei);
1633                         btrfs_memcpy(root, path->nodes[0]->b_data,
1634                                      ptr, bh->b_data, offset + write_bytes);
1635                         mark_buffer_dirty(path->nodes[0]);
1636                         btrfs_free_path(path);
1637                 } else if (buffer_mapped(bh)) {
1638                         btrfs_csum_file_block(trans, root, inode->i_ino,
1639                                       pages[i]->index << PAGE_CACHE_SHIFT,
1640                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1641                         kunmap(pages[i]);
1642                 }
1643                 SetPageChecked(pages[i]);
1644                 // btrfs_update_inode_block_group(trans, inode);
1645                 ret = btrfs_end_transaction(trans, root);
1646                 BUG_ON(ret);
1647                 mutex_unlock(&root->fs_info->fs_mutex);
1648
1649                 ret = btrfs_commit_write(file, pages[i], offset,
1650                                          offset + this_write);
1651                 pos += this_write;
1652                 if (ret) {
1653                         err = ret;
1654                         goto failed;
1655                 }
1656                 WARN_ON(this_write > write_bytes);
1657                 write_bytes -= this_write;
1658         }
1659 failed:
1660         return err;
1661 }
1662
1663 static int drop_extents(struct btrfs_trans_handle *trans,
1664                           struct btrfs_root *root,
1665                           struct inode *inode,
1666                           u64 start, u64 end, u64 *hint_block)
1667 {
1668         int ret;
1669         struct btrfs_key key;
1670         struct btrfs_leaf *leaf;
1671         int slot;
1672         struct btrfs_file_extent_item *extent;
1673         u64 extent_end = 0;
1674         int keep;
1675         struct btrfs_file_extent_item old;
1676         struct btrfs_path *path;
1677         u64 search_start = start;
1678         int bookend;
1679         int found_type;
1680         int found_extent;
1681         int found_inline;
1682
1683         path = btrfs_alloc_path();
1684         if (!path)
1685                 return -ENOMEM;
1686         while(1) {
1687                 btrfs_release_path(root, path);
1688                 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1689                                                search_start, -1);
1690                 if (ret < 0)
1691                         goto out;
1692                 if (ret > 0) {
1693                         if (path->slots[0] == 0) {
1694                                 ret = 0;
1695                                 goto out;
1696                         }
1697                         path->slots[0]--;
1698                 }
1699                 keep = 0;
1700                 bookend = 0;
1701                 found_extent = 0;
1702                 found_inline = 0;
1703                 extent = NULL;
1704                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1705                 slot = path->slots[0];
1706                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1707                 if (key.offset >= end || key.objectid != inode->i_ino) {
1708                         ret = 0;
1709                         goto out;
1710                 }
1711                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1712                         ret = 0;
1713                         goto out;
1714                 }
1715                 extent = btrfs_item_ptr(leaf, slot,
1716                                         struct btrfs_file_extent_item);
1717                 found_type = btrfs_file_extent_type(extent);
1718                 if (found_type == BTRFS_FILE_EXTENT_REG) {
1719                         extent_end = key.offset +
1720                                 (btrfs_file_extent_num_blocks(extent) <<
1721                                  inode->i_blkbits);
1722                         found_extent = 1;
1723                 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1724                         found_inline = 1;
1725                         extent_end = key.offset +
1726                              btrfs_file_extent_inline_len(leaf->items + slot);
1727                 }
1728
1729                 if (!found_extent && !found_inline) {
1730                         ret = 0;
1731                         goto out;
1732                 }
1733
1734                 if (search_start >= extent_end) {
1735                         ret = 0;
1736                         goto out;
1737                 }
1738
1739                 if (found_inline) {
1740                         u64 mask = root->blocksize - 1;
1741                         search_start = (extent_end + mask) & ~mask;
1742                 } else
1743                         search_start = extent_end;
1744
1745                 if (end < extent_end && end >= key.offset) {
1746                         if (found_extent) {
1747                                 u64 disk_blocknr =
1748                                         btrfs_file_extent_disk_blocknr(extent);
1749                                 u64 disk_num_blocks =
1750                                       btrfs_file_extent_disk_num_blocks(extent);
1751                                 memcpy(&old, extent, sizeof(old));
1752                                 if (disk_blocknr != 0) {
1753                                         ret = btrfs_inc_extent_ref(trans, root,
1754                                                  disk_blocknr, disk_num_blocks);
1755                                         BUG_ON(ret);
1756                                 }
1757                         }
1758                         WARN_ON(found_inline);
1759                         bookend = 1;
1760                 }
1761
1762                 if (start > key.offset) {
1763                         u64 new_num;
1764                         u64 old_num;
1765                         /* truncate existing extent */
1766                         keep = 1;
1767                         WARN_ON(start & (root->blocksize - 1));
1768                         if (found_extent) {
1769                                 new_num = (start - key.offset) >>
1770                                         inode->i_blkbits;
1771                                 old_num = btrfs_file_extent_num_blocks(extent);
1772                                 *hint_block =
1773                                         btrfs_file_extent_disk_blocknr(extent);
1774                                 if (btrfs_file_extent_disk_blocknr(extent)) {
1775                                         inode->i_blocks -=
1776                                                 (old_num - new_num) << 3;
1777                                 }
1778                                 btrfs_set_file_extent_num_blocks(extent,
1779                                                                  new_num);
1780                                 mark_buffer_dirty(path->nodes[0]);
1781                         } else {
1782                                 WARN_ON(1);
1783                         }
1784                 }
1785                 if (!keep) {
1786                         u64 disk_blocknr = 0;
1787                         u64 disk_num_blocks = 0;
1788                         u64 extent_num_blocks = 0;
1789                         if (found_extent) {
1790                                 disk_blocknr =
1791                                       btrfs_file_extent_disk_blocknr(extent);
1792                                 disk_num_blocks =
1793                                       btrfs_file_extent_disk_num_blocks(extent);
1794                                 extent_num_blocks =
1795                                       btrfs_file_extent_num_blocks(extent);
1796                                 *hint_block =
1797                                         btrfs_file_extent_disk_blocknr(extent);
1798                         }
1799                         ret = btrfs_del_item(trans, root, path);
1800                         BUG_ON(ret);
1801                         btrfs_release_path(root, path);
1802                         extent = NULL;
1803                         if (found_extent && disk_blocknr != 0) {
1804                                 inode->i_blocks -= extent_num_blocks << 3;
1805                                 ret = btrfs_free_extent(trans, root,
1806                                                         disk_blocknr,
1807                                                         disk_num_blocks, 0);
1808                         }
1809
1810                         BUG_ON(ret);
1811                         if (!bookend && search_start >= end) {
1812                                 ret = 0;
1813                                 goto out;
1814                         }
1815                         if (!bookend)
1816                                 continue;
1817                 }
1818                 if (bookend && found_extent) {
1819                         /* create bookend */
1820                         struct btrfs_key ins;
1821                         ins.objectid = inode->i_ino;
1822                         ins.offset = end;
1823                         ins.flags = 0;
1824                         btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1825
1826                         btrfs_release_path(root, path);
1827                         ret = btrfs_insert_empty_item(trans, root, path, &ins,
1828                                                       sizeof(*extent));
1829                         BUG_ON(ret);
1830                         extent = btrfs_item_ptr(
1831                                     btrfs_buffer_leaf(path->nodes[0]),
1832                                     path->slots[0],
1833                                     struct btrfs_file_extent_item);
1834                         btrfs_set_file_extent_disk_blocknr(extent,
1835                                     btrfs_file_extent_disk_blocknr(&old));
1836                         btrfs_set_file_extent_disk_num_blocks(extent,
1837                                     btrfs_file_extent_disk_num_blocks(&old));
1838
1839                         btrfs_set_file_extent_offset(extent,
1840                                     btrfs_file_extent_offset(&old) +
1841                                     ((end - key.offset) >> inode->i_blkbits));
1842                         WARN_ON(btrfs_file_extent_num_blocks(&old) <
1843                                 (extent_end - end) >> inode->i_blkbits);
1844                         btrfs_set_file_extent_num_blocks(extent,
1845                                     (extent_end - end) >> inode->i_blkbits);
1846
1847                         btrfs_set_file_extent_type(extent,
1848                                                    BTRFS_FILE_EXTENT_REG);
1849                         btrfs_set_file_extent_generation(extent,
1850                                     btrfs_file_extent_generation(&old));
1851                         btrfs_mark_buffer_dirty(path->nodes[0]);
1852                         if (btrfs_file_extent_disk_blocknr(&old) != 0) {
1853                                 inode->i_blocks +=
1854                                       btrfs_file_extent_num_blocks(extent) << 3;
1855                         }
1856                         ret = 0;
1857                         goto out;
1858                 }
1859         }
1860 out:
1861         btrfs_free_path(path);
1862         return ret;
1863 }
1864
1865 static int prepare_pages(struct btrfs_root *root,
1866                          struct file *file,
1867                          struct page **pages,
1868                          size_t num_pages,
1869                          loff_t pos,
1870                          unsigned long first_index,
1871                          unsigned long last_index,
1872                          size_t write_bytes,
1873                          u64 alloc_extent_start)
1874 {
1875         int i;
1876         unsigned long index = pos >> PAGE_CACHE_SHIFT;
1877         struct inode *inode = file->f_path.dentry->d_inode;
1878         int offset;
1879         int err = 0;
1880         int this_write;
1881         struct buffer_head *bh;
1882         struct buffer_head *head;
1883         loff_t isize = i_size_read(inode);
1884
1885         memset(pages, 0, num_pages * sizeof(struct page *));
1886
1887         for (i = 0; i < num_pages; i++) {
1888                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
1889                 if (!pages[i]) {
1890                         err = -ENOMEM;
1891                         goto failed_release;
1892                 }
1893                 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
1894                 wait_on_page_writeback(pages[i]);
1895                 offset = pos & (PAGE_CACHE_SIZE -1);
1896                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1897                 if (!page_has_buffers(pages[i])) {
1898                         create_empty_buffers(pages[i],
1899                                              root->fs_info->sb->s_blocksize,
1900                                              (1 << BH_Uptodate));
1901                 }
1902                 head = page_buffers(pages[i]);
1903                 bh = head;
1904                 do {
1905                         err = btrfs_map_bh_to_logical(root, bh,
1906                                                       alloc_extent_start);
1907                         BUG_ON(err);
1908                         if (err)
1909                                 goto failed_truncate;
1910                         bh = bh->b_this_page;
1911                         if (alloc_extent_start)
1912                                 alloc_extent_start++;
1913                 } while (bh != head);
1914                 pos += this_write;
1915                 WARN_ON(this_write > write_bytes);
1916                 write_bytes -= this_write;
1917         }
1918         return 0;
1919
1920 failed_release:
1921         btrfs_drop_pages(pages, num_pages);
1922         return err;
1923
1924 failed_truncate:
1925         btrfs_drop_pages(pages, num_pages);
1926         if (pos > isize)
1927                 vmtruncate(inode, isize);
1928         return err;
1929 }
1930
1931 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
1932                                 size_t count, loff_t *ppos)
1933 {
1934         loff_t pos;
1935         size_t num_written = 0;
1936         int err = 0;
1937         int ret = 0;
1938         struct inode *inode = file->f_path.dentry->d_inode;
1939         struct btrfs_root *root = BTRFS_I(inode)->root;
1940         struct page *pages[8];
1941         struct page *pinned[2];
1942         unsigned long first_index;
1943         unsigned long last_index;
1944         u64 start_pos;
1945         u64 num_blocks;
1946         u64 alloc_extent_start;
1947         u64 hint_block;
1948         struct btrfs_trans_handle *trans;
1949         struct btrfs_key ins;
1950         pinned[0] = NULL;
1951         pinned[1] = NULL;
1952         if (file->f_flags & O_DIRECT)
1953                 return -EINVAL;
1954         pos = *ppos;
1955         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
1956         current->backing_dev_info = inode->i_mapping->backing_dev_info;
1957         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
1958         if (err)
1959                 goto out;
1960         if (count == 0)
1961                 goto out;
1962         err = remove_suid(file->f_path.dentry);
1963         if (err)
1964                 goto out;
1965         file_update_time(file);
1966
1967         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
1968         num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
1969                         inode->i_blkbits;
1970
1971         mutex_lock(&inode->i_mutex);
1972         first_index = pos >> PAGE_CACHE_SHIFT;
1973         last_index = (pos + count) >> PAGE_CACHE_SHIFT;
1974
1975         if ((first_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1976             (pos & (PAGE_CACHE_SIZE - 1))) {
1977                 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
1978                 if (!PageUptodate(pinned[0])) {
1979                         ret = mpage_readpage(pinned[0], btrfs_get_block);
1980                         BUG_ON(ret);
1981                         wait_on_page_locked(pinned[0]);
1982                 } else {
1983                         unlock_page(pinned[0]);
1984                 }
1985         }
1986         if (first_index != last_index &&
1987             (last_index << PAGE_CACHE_SHIFT) < inode->i_size &&
1988             pos + count < inode->i_size &&
1989             (count & (PAGE_CACHE_SIZE - 1))) {
1990                 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
1991                 if (!PageUptodate(pinned[1])) {
1992                         ret = mpage_readpage(pinned[1], btrfs_get_block);
1993                         BUG_ON(ret);
1994                         wait_on_page_locked(pinned[1]);
1995                 } else {
1996                         unlock_page(pinned[1]);
1997                 }
1998         }
1999
2000         mutex_lock(&root->fs_info->fs_mutex);
2001         trans = btrfs_start_transaction(root, 1);
2002         if (!trans) {
2003                 err = -ENOMEM;
2004                 mutex_unlock(&root->fs_info->fs_mutex);
2005                 goto out_unlock;
2006         }
2007         btrfs_set_trans_block_group(trans, inode);
2008         /* FIXME blocksize != 4096 */
2009         inode->i_blocks += num_blocks << 3;
2010         hint_block = 0;
2011         if (start_pos < inode->i_size) {
2012                 /* FIXME blocksize != pagesize */
2013                 ret = drop_extents(trans, root, inode,
2014                                    start_pos,
2015                                    (pos + count + root->blocksize -1) &
2016                                    ~((u64)root->blocksize - 1), &hint_block);
2017                 BUG_ON(ret);
2018         }
2019         if (inode->i_size < start_pos) {
2020                 u64 last_pos_in_file;
2021                 u64 hole_size;
2022                 u64 mask = root->blocksize - 1;
2023                 last_pos_in_file = (inode->i_size + mask) & ~mask;
2024                 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
2025                 hole_size >>= inode->i_blkbits;
2026                 if (last_pos_in_file < start_pos) {
2027                         ret = btrfs_insert_file_extent(trans, root,
2028                                                        inode->i_ino,
2029                                                        last_pos_in_file,
2030                                                        0, 0, hole_size);
2031                 }
2032                 BUG_ON(ret);
2033         }
2034         if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
2035             pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
2036                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
2037                                          num_blocks, hint_block, (u64)-1,
2038                                          &ins, 1);
2039                 BUG_ON(ret);
2040                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
2041                                        start_pos, ins.objectid, ins.offset,
2042                                        ins.offset);
2043                 BUG_ON(ret);
2044         } else {
2045                 ins.offset = 0;
2046                 ins.objectid = 0;
2047         }
2048         BUG_ON(ret);
2049         alloc_extent_start = ins.objectid;
2050         // btrfs_update_inode_block_group(trans, inode);
2051         ret = btrfs_end_transaction(trans, root);
2052         mutex_unlock(&root->fs_info->fs_mutex);
2053
2054         while(count > 0) {
2055                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
2056                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
2057                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
2058                                         PAGE_CACHE_SHIFT;
2059
2060                 memset(pages, 0, sizeof(pages));
2061                 ret = prepare_pages(root, file, pages, num_pages,
2062                                     pos, first_index, last_index,
2063                                     write_bytes, alloc_extent_start);
2064                 BUG_ON(ret);
2065
2066                 /* FIXME blocks != pagesize */
2067                 if (alloc_extent_start)
2068                         alloc_extent_start += num_pages;
2069                 ret = btrfs_copy_from_user(pos, num_pages,
2070                                            write_bytes, pages, buf);
2071                 BUG_ON(ret);
2072
2073                 ret = dirty_and_release_pages(NULL, root, file, pages,
2074                                               num_pages, pos, write_bytes);
2075                 BUG_ON(ret);
2076                 btrfs_drop_pages(pages, num_pages);
2077
2078                 buf += write_bytes;
2079                 count -= write_bytes;
2080                 pos += write_bytes;
2081                 num_written += write_bytes;
2082
2083                 balance_dirty_pages_ratelimited(inode->i_mapping);
2084                 btrfs_btree_balance_dirty(root);
2085                 cond_resched();
2086         }
2087 out_unlock:
2088         mutex_unlock(&inode->i_mutex);
2089 out:
2090         if (pinned[0])
2091                 page_cache_release(pinned[0]);
2092         if (pinned[1])
2093                 page_cache_release(pinned[1]);
2094         *ppos = pos;
2095         current->backing_dev_info = NULL;
2096         mark_inode_dirty(inode);
2097         return num_written ? num_written : err;
2098 }
2099
2100 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
2101                         unsigned long offset, unsigned long size)
2102 {
2103         char *kaddr;
2104         unsigned long left, count = desc->count;
2105         struct inode *inode = page->mapping->host;
2106
2107         if (size > count)
2108                 size = count;
2109
2110         if (!PageChecked(page)) {
2111                 /* FIXME, do it per block */
2112                 struct btrfs_root *root = BTRFS_I(inode)->root;
2113
2114                 int ret = btrfs_csum_verify_file_block(root,
2115                                   page->mapping->host->i_ino,
2116                                   page->index << PAGE_CACHE_SHIFT,
2117                                   kmap(page), PAGE_CACHE_SIZE);
2118                 if (ret) {
2119                         if (ret != -ENOENT) {
2120                                 printk("failed to verify ino %lu page %lu\n",
2121                                        page->mapping->host->i_ino,
2122                                        page->index);
2123                                 memset(page_address(page), 0, PAGE_CACHE_SIZE);
2124                                 flush_dcache_page(page);
2125                         }
2126                 }
2127                 SetPageChecked(page);
2128                 kunmap(page);
2129         }
2130         /*
2131          * Faults on the destination of a read are common, so do it before
2132          * taking the kmap.
2133          */
2134         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
2135                 kaddr = kmap_atomic(page, KM_USER0);
2136                 left = __copy_to_user_inatomic(desc->arg.buf,
2137                                                 kaddr + offset, size);
2138                 kunmap_atomic(kaddr, KM_USER0);
2139                 if (left == 0)
2140                         goto success;
2141         }
2142
2143         /* Do it the slow way */
2144         kaddr = kmap(page);
2145         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2146         kunmap(page);
2147
2148         if (left) {
2149                 size -= left;
2150                 desc->error = -EFAULT;
2151         }
2152 success:
2153         desc->count = count - size;
2154         desc->written += size;
2155         desc->arg.buf += size;
2156         return size;
2157 }
2158
2159 /**
2160  * btrfs_file_aio_read - filesystem read routine
2161  * @iocb:       kernel I/O control block
2162  * @iov:        io vector request
2163  * @nr_segs:    number of segments in the iovec
2164  * @pos:        current file position
2165  */
2166 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2167                                    unsigned long nr_segs, loff_t pos)
2168 {
2169         struct file *filp = iocb->ki_filp;
2170         ssize_t retval;
2171         unsigned long seg;
2172         size_t count;
2173         loff_t *ppos = &iocb->ki_pos;
2174
2175         count = 0;
2176         for (seg = 0; seg < nr_segs; seg++) {
2177                 const struct iovec *iv = &iov[seg];
2178
2179                 /*
2180                  * If any segment has a negative length, or the cumulative
2181                  * length ever wraps negative then return -EINVAL.
2182                  */
2183                 count += iv->iov_len;
2184                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2185                         return -EINVAL;
2186                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2187                         continue;
2188                 if (seg == 0)
2189                         return -EFAULT;
2190                 nr_segs = seg;
2191                 count -= iv->iov_len;   /* This segment is no good */
2192                 break;
2193         }
2194         retval = 0;
2195         if (count) {
2196                 for (seg = 0; seg < nr_segs; seg++) {
2197                         read_descriptor_t desc;
2198
2199                         desc.written = 0;
2200                         desc.arg.buf = iov[seg].iov_base;
2201                         desc.count = iov[seg].iov_len;
2202                         if (desc.count == 0)
2203                                 continue;
2204                         desc.error = 0;
2205                         do_generic_file_read(filp, ppos, &desc,
2206                                              btrfs_read_actor);
2207                         retval += desc.written;
2208                         if (desc.error) {
2209                                 retval = retval ?: desc.error;
2210                                 break;
2211                         }
2212                 }
2213         }
2214         return retval;
2215 }
2216
2217 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2218 {
2219         struct btrfs_trans_handle *trans;
2220         struct btrfs_key key;
2221         struct btrfs_root_item root_item;
2222         struct btrfs_inode_item *inode_item;
2223         struct buffer_head *subvol;
2224         struct btrfs_leaf *leaf;
2225         struct btrfs_root *new_root;
2226         struct inode *inode;
2227         struct inode *dir;
2228         int ret;
2229         u64 objectid;
2230         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2231
2232         mutex_lock(&root->fs_info->fs_mutex);
2233         trans = btrfs_start_transaction(root, 1);
2234         BUG_ON(!trans);
2235
2236         subvol = btrfs_alloc_free_block(trans, root, 0);
2237         if (subvol == NULL)
2238                 return -ENOSPC;
2239         leaf = btrfs_buffer_leaf(subvol);
2240         btrfs_set_header_nritems(&leaf->header, 0);
2241         btrfs_set_header_level(&leaf->header, 0);
2242         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2243         btrfs_set_header_generation(&leaf->header, trans->transid);
2244         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2245         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2246                sizeof(leaf->header.fsid));
2247         mark_buffer_dirty(subvol);
2248
2249         inode_item = &root_item.inode;
2250         memset(inode_item, 0, sizeof(*inode_item));
2251         btrfs_set_inode_generation(inode_item, 1);
2252         btrfs_set_inode_size(inode_item, 3);
2253         btrfs_set_inode_nlink(inode_item, 1);
2254         btrfs_set_inode_nblocks(inode_item, 1);
2255         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2256
2257         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2258         btrfs_set_root_refs(&root_item, 1);
2259         brelse(subvol);
2260         subvol = NULL;
2261
2262         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2263                                        0, &objectid);
2264         BUG_ON(ret);
2265
2266         btrfs_set_root_dirid(&root_item, new_dirid);
2267
2268         key.objectid = objectid;
2269         key.offset = 1;
2270         key.flags = 0;
2271         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2272         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2273                                 &root_item);
2274         BUG_ON(ret);
2275
2276         /*
2277          * insert the directory item
2278          */
2279         key.offset = (u64)-1;
2280         dir = root->fs_info->sb->s_root->d_inode;
2281         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2282                                     name, namelen, dir->i_ino, &key, 0);
2283         BUG_ON(ret);
2284
2285         ret = btrfs_commit_transaction(trans, root);
2286         BUG_ON(ret);
2287
2288         new_root = btrfs_read_fs_root(root->fs_info, &key);
2289         BUG_ON(!new_root);
2290
2291         trans = btrfs_start_transaction(new_root, 1);
2292         BUG_ON(!trans);
2293
2294         inode = btrfs_new_inode(trans, new_root, new_dirid,
2295                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2296         inode->i_op = &btrfs_dir_inode_operations;
2297         inode->i_fop = &btrfs_dir_file_operations;
2298
2299         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2300         BUG_ON(ret);
2301
2302         inode->i_nlink = 1;
2303         inode->i_size = 6;
2304         ret = btrfs_update_inode(trans, new_root, inode);
2305         BUG_ON(ret);
2306
2307         ret = btrfs_commit_transaction(trans, new_root);
2308         BUG_ON(ret);
2309
2310         iput(inode);
2311
2312         mutex_unlock(&root->fs_info->fs_mutex);
2313         btrfs_btree_balance_dirty(root);
2314         return 0;
2315 }
2316
2317 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2318 {
2319         struct btrfs_trans_handle *trans;
2320         struct btrfs_key key;
2321         struct btrfs_root_item new_root_item;
2322         int ret;
2323         u64 objectid;
2324
2325         if (!root->ref_cows)
2326                 return -EINVAL;
2327
2328         mutex_lock(&root->fs_info->fs_mutex);
2329         trans = btrfs_start_transaction(root, 1);
2330         BUG_ON(!trans);
2331
2332         ret = btrfs_update_inode(trans, root, root->inode);
2333         BUG_ON(ret);
2334
2335         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2336                                        0, &objectid);
2337         BUG_ON(ret);
2338
2339         memcpy(&new_root_item, &root->root_item,
2340                sizeof(new_root_item));
2341
2342         key.objectid = objectid;
2343         key.offset = 1;
2344         key.flags = 0;
2345         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2346         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2347
2348         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2349                                 &new_root_item);
2350         BUG_ON(ret);
2351
2352         /*
2353          * insert the directory item
2354          */
2355         key.offset = (u64)-1;
2356         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2357                                     name, namelen,
2358                                     root->fs_info->sb->s_root->d_inode->i_ino,
2359                                     &key, 0);
2360
2361         BUG_ON(ret);
2362
2363         ret = btrfs_inc_root_ref(trans, root);
2364         BUG_ON(ret);
2365
2366         ret = btrfs_commit_transaction(trans, root);
2367         BUG_ON(ret);
2368         mutex_unlock(&root->fs_info->fs_mutex);
2369         btrfs_btree_balance_dirty(root);
2370         return 0;
2371 }
2372
2373 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2374 {
2375         struct block_device *bdev;
2376         struct btrfs_path *path;
2377         struct super_block *sb = root->fs_info->sb;
2378         struct btrfs_root *dev_root = root->fs_info->dev_root;
2379         struct btrfs_trans_handle *trans;
2380         struct btrfs_device_item *dev_item;
2381         struct btrfs_key key;
2382         u16 item_size;
2383         u64 num_blocks;
2384         u64 new_blocks;
2385         u64 device_id;
2386         int ret;
2387
2388 printk("adding disk %s\n", name);
2389         path = btrfs_alloc_path();
2390         if (!path)
2391                 return -ENOMEM;
2392         num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2393         bdev = open_bdev_excl(name, O_RDWR, sb);
2394         if (IS_ERR(bdev)) {
2395                 ret = PTR_ERR(bdev);
2396 printk("open bdev excl failed ret %d\n", ret);
2397                 goto out_nolock;
2398         }
2399         set_blocksize(bdev, sb->s_blocksize);
2400         new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2401         key.objectid = num_blocks;
2402         key.offset = new_blocks;
2403         key.flags = 0;
2404         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2405
2406         mutex_lock(&dev_root->fs_info->fs_mutex);
2407         trans = btrfs_start_transaction(dev_root, 1);
2408         item_size = sizeof(*dev_item) + namelen;
2409 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2410         ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2411         if (ret) {
2412 printk("insert failed %d\n", ret);
2413                 close_bdev_excl(bdev);
2414                 if (ret > 0)
2415                         ret = -EEXIST;
2416                 goto out;
2417         }
2418         dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2419                                   path->slots[0], struct btrfs_device_item);
2420         btrfs_set_device_pathlen(dev_item, namelen);
2421         memcpy(dev_item + 1, name, namelen);
2422
2423         device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2424         btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2425         btrfs_set_device_id(dev_item, device_id);
2426         mark_buffer_dirty(path->nodes[0]);
2427
2428         ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2429                                      new_blocks);
2430
2431         if (!ret) {
2432                 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2433                                              num_blocks + new_blocks);
2434                 i_size_write(root->fs_info->btree_inode,
2435                              (num_blocks + new_blocks) <<
2436                              root->fs_info->btree_inode->i_blkbits);
2437         }
2438
2439 out:
2440         ret = btrfs_commit_transaction(trans, dev_root);
2441         BUG_ON(ret);
2442         mutex_unlock(&root->fs_info->fs_mutex);
2443 out_nolock:
2444         btrfs_free_path(path);
2445         btrfs_btree_balance_dirty(root);
2446
2447         return ret;
2448 }
2449
2450 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2451                        cmd, unsigned long arg)
2452 {
2453         struct btrfs_root *root = BTRFS_I(inode)->root;
2454         struct btrfs_ioctl_vol_args vol_args;
2455         int ret = 0;
2456         struct btrfs_dir_item *di;
2457         int namelen;
2458         struct btrfs_path *path;
2459         u64 root_dirid;
2460
2461         switch (cmd) {
2462         case BTRFS_IOC_SNAP_CREATE:
2463                 if (copy_from_user(&vol_args,
2464                                    (struct btrfs_ioctl_vol_args __user *)arg,
2465                                    sizeof(vol_args)))
2466                         return -EFAULT;
2467                 namelen = strlen(vol_args.name);
2468                 if (namelen > BTRFS_VOL_NAME_MAX)
2469                         return -EINVAL;
2470                 path = btrfs_alloc_path();
2471                 if (!path)
2472                         return -ENOMEM;
2473                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2474                 mutex_lock(&root->fs_info->fs_mutex);
2475                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2476                                     path, root_dirid,
2477                                     vol_args.name, namelen, 0);
2478                 mutex_unlock(&root->fs_info->fs_mutex);
2479                 btrfs_free_path(path);
2480                 if (di && !IS_ERR(di))
2481                         return -EEXIST;
2482
2483                 if (root == root->fs_info->tree_root)
2484                         ret = create_subvol(root, vol_args.name, namelen);
2485                 else
2486                         ret = create_snapshot(root, vol_args.name, namelen);
2487                 WARN_ON(ret);
2488                 break;
2489         case BTRFS_IOC_ADD_DISK:
2490                 if (copy_from_user(&vol_args,
2491                                    (struct btrfs_ioctl_vol_args __user *)arg,
2492                                    sizeof(vol_args)))
2493                         return -EFAULT;
2494                 namelen = strlen(vol_args.name);
2495                 if (namelen > BTRFS_VOL_NAME_MAX)
2496                         return -EINVAL;
2497                 vol_args.name[namelen] = '\0';
2498                 ret = add_disk(root, vol_args.name, namelen);
2499                 break;
2500         default:
2501                 return -ENOTTY;
2502         }
2503         return ret;
2504 }
2505
2506 static struct kmem_cache *btrfs_inode_cachep;
2507 struct kmem_cache *btrfs_trans_handle_cachep;
2508 struct kmem_cache *btrfs_transaction_cachep;
2509 struct kmem_cache *btrfs_bit_radix_cachep;
2510 struct kmem_cache *btrfs_path_cachep;
2511
2512 /*
2513  * Called inside transaction, so use GFP_NOFS
2514  */
2515 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2516 {
2517         struct btrfs_inode *ei;
2518
2519         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2520         if (!ei)
2521                 return NULL;
2522         return &ei->vfs_inode;
2523 }
2524
2525 static void btrfs_destroy_inode(struct inode *inode)
2526 {
2527         WARN_ON(!list_empty(&inode->i_dentry));
2528         WARN_ON(inode->i_data.nrpages);
2529
2530         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2531 }
2532
2533 static void init_once(void * foo, struct kmem_cache * cachep,
2534                       unsigned long flags)
2535 {
2536         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2537
2538         if ((flags & (SLAB_CTOR_CONSTRUCTOR)) ==
2539             SLAB_CTOR_CONSTRUCTOR) {
2540                 inode_init_once(&ei->vfs_inode);
2541         }
2542 }
2543
2544 static int init_inodecache(void)
2545 {
2546         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2547                                              sizeof(struct btrfs_inode),
2548                                              0, (SLAB_RECLAIM_ACCOUNT|
2549                                                 SLAB_MEM_SPREAD),
2550                                              init_once, NULL);
2551         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2552                                              sizeof(struct btrfs_trans_handle),
2553                                              0, (SLAB_RECLAIM_ACCOUNT|
2554                                                 SLAB_MEM_SPREAD),
2555                                              NULL, NULL);
2556         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2557                                              sizeof(struct btrfs_transaction),
2558                                              0, (SLAB_RECLAIM_ACCOUNT|
2559                                                 SLAB_MEM_SPREAD),
2560                                              NULL, NULL);
2561         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2562                                              sizeof(struct btrfs_transaction),
2563                                              0, (SLAB_RECLAIM_ACCOUNT|
2564                                                 SLAB_MEM_SPREAD),
2565                                              NULL, NULL);
2566         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2567                                              256,
2568                                              0, (SLAB_RECLAIM_ACCOUNT|
2569                                                 SLAB_MEM_SPREAD |
2570                                                 SLAB_DESTROY_BY_RCU),
2571                                              NULL, NULL);
2572         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2573             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2574                 return -ENOMEM;
2575         return 0;
2576 }
2577
2578 static void destroy_inodecache(void)
2579 {
2580         kmem_cache_destroy(btrfs_inode_cachep);
2581         kmem_cache_destroy(btrfs_trans_handle_cachep);
2582         kmem_cache_destroy(btrfs_transaction_cachep);
2583         kmem_cache_destroy(btrfs_bit_radix_cachep);
2584         kmem_cache_destroy(btrfs_path_cachep);
2585 }
2586
2587 static int btrfs_get_sb(struct file_system_type *fs_type,
2588         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2589 {
2590         return get_sb_bdev(fs_type, flags, dev_name, data,
2591                            btrfs_fill_super, mnt);
2592 }
2593
2594 static int btrfs_getattr(struct vfsmount *mnt,
2595                          struct dentry *dentry, struct kstat *stat)
2596 {
2597         struct inode *inode = dentry->d_inode;
2598         generic_fillattr(inode, stat);
2599         stat->blksize = 256 * 1024;
2600         return 0;
2601 }
2602
2603 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2604 {
2605         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2606         struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2607
2608         buf->f_namelen = BTRFS_NAME_LEN;
2609         buf->f_blocks = btrfs_super_total_blocks(disk_super);
2610         buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2611         buf->f_bavail = buf->f_bfree;
2612         buf->f_bsize = dentry->d_sb->s_blocksize;
2613         buf->f_type = BTRFS_SUPER_MAGIC;
2614         return 0;
2615 }
2616
2617 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2618                            struct inode * new_dir,struct dentry *new_dentry)
2619 {
2620         struct btrfs_trans_handle *trans;
2621         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2622         struct inode *new_inode = new_dentry->d_inode;
2623         struct inode *old_inode = old_dentry->d_inode;
2624         struct timespec ctime = CURRENT_TIME;
2625         struct btrfs_path *path;
2626         struct btrfs_dir_item *di;
2627         int ret;
2628
2629         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2630             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2631                 return -ENOTEMPTY;
2632         }
2633         mutex_lock(&root->fs_info->fs_mutex);
2634         trans = btrfs_start_transaction(root, 1);
2635         btrfs_set_trans_block_group(trans, new_dir);
2636         path = btrfs_alloc_path();
2637         if (!path) {
2638                 ret = -ENOMEM;
2639                 goto out_fail;
2640         }
2641
2642         old_dentry->d_inode->i_nlink++;
2643         old_dir->i_ctime = old_dir->i_mtime = ctime;
2644         new_dir->i_ctime = new_dir->i_mtime = ctime;
2645         old_inode->i_ctime = ctime;
2646         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2647                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2648                 u64 old_parent_oid;
2649                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2650                                            "..", 2, -1);
2651                 if (IS_ERR(di)) {
2652                         ret = PTR_ERR(di);
2653                         goto out_fail;
2654                 }
2655                 if (!di) {
2656                         ret = -ENOENT;
2657                         goto out_fail;
2658                 }
2659                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2660                 ret = btrfs_del_item(trans, root, path);
2661                 if (ret) {
2662                         ret = -EIO;
2663                         goto out_fail;
2664                 }
2665                 btrfs_release_path(root, path);
2666
2667                 di = btrfs_lookup_dir_index_item(trans, root, path,
2668                                                  old_inode->i_ino,
2669                                                  old_parent_oid,
2670                                                  "..", 2, -1);
2671                 if (IS_ERR(di)) {
2672                         ret = PTR_ERR(di);
2673                         goto out_fail;
2674                 }
2675                 if (!di) {
2676                         ret = -ENOENT;
2677                         goto out_fail;
2678                 }
2679                 ret = btrfs_del_item(trans, root, path);
2680                 if (ret) {
2681                         ret = -EIO;
2682                         goto out_fail;
2683                 }
2684                 btrfs_release_path(root, path);
2685
2686                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2687                                             old_inode->i_ino, location, 0);
2688                 if (ret)
2689                         goto out_fail;
2690         }
2691
2692
2693         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2694         if (ret)
2695                 goto out_fail;
2696
2697         if (new_inode) {
2698                 new_inode->i_ctime = CURRENT_TIME;
2699                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2700                 if (ret)
2701                         goto out_fail;
2702                 if (S_ISDIR(new_inode->i_mode))
2703                         clear_nlink(new_inode);
2704                 else
2705                         drop_nlink(new_inode);
2706                 btrfs_update_inode(trans, root, new_inode);
2707         }
2708         ret = btrfs_add_link(trans, new_dentry, old_inode);
2709         if (ret)
2710                 goto out_fail;
2711
2712 out_fail:
2713         btrfs_free_path(path);
2714         btrfs_end_transaction(trans, root);
2715         mutex_unlock(&root->fs_info->fs_mutex);
2716         return ret;
2717 }
2718
2719 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2720                          const char *symname)
2721 {
2722         struct btrfs_trans_handle *trans;
2723         struct btrfs_root *root = BTRFS_I(dir)->root;
2724         struct btrfs_path *path;
2725         struct btrfs_key key;
2726         struct inode *inode;
2727         int err;
2728         int drop_inode = 0;
2729         u64 objectid;
2730         int name_len;
2731         int datasize;
2732         char *ptr;
2733         struct btrfs_file_extent_item *ei;
2734
2735         name_len = strlen(symname) + 1;
2736         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2737                 return -ENAMETOOLONG;
2738         mutex_lock(&root->fs_info->fs_mutex);
2739         trans = btrfs_start_transaction(root, 1);
2740         btrfs_set_trans_block_group(trans, dir);
2741
2742         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2743         if (err) {
2744                 err = -ENOSPC;
2745                 goto out_unlock;
2746         }
2747
2748         inode = btrfs_new_inode(trans, root, objectid,
2749                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2750         err = PTR_ERR(inode);
2751         if (IS_ERR(inode))
2752                 goto out_unlock;
2753
2754         btrfs_set_trans_block_group(trans, inode);
2755         err = btrfs_add_nondir(trans, dentry, inode);
2756         if (err)
2757                 drop_inode = 1;
2758         else {
2759                 inode->i_mapping->a_ops = &btrfs_aops;
2760                 inode->i_fop = &btrfs_file_operations;
2761                 inode->i_op = &btrfs_file_inode_operations;
2762         }
2763         dir->i_sb->s_dirt = 1;
2764         btrfs_update_inode_block_group(trans, inode);
2765         btrfs_update_inode_block_group(trans, dir);
2766         if (drop_inode)
2767                 goto out_unlock;
2768
2769         path = btrfs_alloc_path();
2770         BUG_ON(!path);
2771         key.objectid = inode->i_ino;
2772         key.offset = 0;
2773         key.flags = 0;
2774         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2775         datasize = btrfs_file_extent_calc_inline_size(name_len);
2776         err = btrfs_insert_empty_item(trans, root, path, &key,
2777                                       datasize);
2778         BUG_ON(err);
2779         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2780                path->slots[0], struct btrfs_file_extent_item);
2781         btrfs_set_file_extent_generation(ei, trans->transid);
2782         btrfs_set_file_extent_type(ei,
2783                                    BTRFS_FILE_EXTENT_INLINE);
2784         ptr = btrfs_file_extent_inline_start(ei);
2785         btrfs_memcpy(root, path->nodes[0]->b_data,
2786                      ptr, symname, name_len);
2787         mark_buffer_dirty(path->nodes[0]);
2788         btrfs_free_path(path);
2789         inode->i_op = &btrfs_symlink_inode_operations;
2790         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2791         inode->i_size = name_len - 1;
2792         btrfs_update_inode(trans, root, inode);
2793         err = 0;
2794
2795 out_unlock:
2796         btrfs_end_transaction(trans, root);
2797         mutex_unlock(&root->fs_info->fs_mutex);
2798
2799         if (drop_inode) {
2800                 inode_dec_link_count(inode);
2801                 iput(inode);
2802         }
2803         btrfs_btree_balance_dirty(root);
2804         return err;
2805 }
2806
2807 static struct file_system_type btrfs_fs_type = {
2808         .owner          = THIS_MODULE,
2809         .name           = "btrfs",
2810         .get_sb         = btrfs_get_sb,
2811         .kill_sb        = kill_block_super,
2812         .fs_flags       = FS_REQUIRES_DEV,
2813 };
2814
2815 static struct super_operations btrfs_super_ops = {
2816         .delete_inode   = btrfs_delete_inode,
2817         .put_super      = btrfs_put_super,
2818         .read_inode     = btrfs_read_locked_inode,
2819         .write_super    = btrfs_write_super,
2820         .sync_fs        = btrfs_sync_fs,
2821         .write_inode    = btrfs_write_inode,
2822         .dirty_inode    = btrfs_dirty_inode,
2823         .alloc_inode    = btrfs_alloc_inode,
2824         .destroy_inode  = btrfs_destroy_inode,
2825         .statfs         = btrfs_statfs,
2826 };
2827
2828 static struct inode_operations btrfs_dir_inode_operations = {
2829         .lookup         = btrfs_lookup,
2830         .create         = btrfs_create,
2831         .unlink         = btrfs_unlink,
2832         .link           = btrfs_link,
2833         .mkdir          = btrfs_mkdir,
2834         .rmdir          = btrfs_rmdir,
2835         .rename         = btrfs_rename,
2836         .symlink        = btrfs_symlink,
2837         .setattr        = btrfs_setattr,
2838 };
2839
2840 static struct inode_operations btrfs_dir_ro_inode_operations = {
2841         .lookup         = btrfs_lookup,
2842 };
2843
2844 static struct file_operations btrfs_dir_file_operations = {
2845         .llseek         = generic_file_llseek,
2846         .read           = generic_read_dir,
2847         .readdir        = btrfs_readdir,
2848         .ioctl          = btrfs_ioctl,
2849 };
2850
2851 static struct address_space_operations btrfs_aops = {
2852         .readpage       = btrfs_readpage,
2853         .writepage      = btrfs_writepage,
2854         .sync_page      = block_sync_page,
2855         .prepare_write  = btrfs_prepare_write,
2856         .commit_write   = btrfs_commit_write,
2857 };
2858
2859 static struct address_space_operations btrfs_symlink_aops = {
2860         .readpage       = btrfs_readpage,
2861         .writepage      = btrfs_writepage,
2862 };
2863
2864 static struct inode_operations btrfs_file_inode_operations = {
2865         .truncate       = btrfs_truncate,
2866         .getattr        = btrfs_getattr,
2867         .setattr        = btrfs_setattr,
2868 };
2869
2870 static struct file_operations btrfs_file_operations = {
2871         .llseek         = generic_file_llseek,
2872         .read           = do_sync_read,
2873         .aio_read       = btrfs_file_aio_read,
2874         .write          = btrfs_file_write,
2875         .mmap           = generic_file_mmap,
2876         .open           = generic_file_open,
2877         .ioctl          = btrfs_ioctl,
2878         .fsync          = btrfs_sync_file,
2879 };
2880
2881 static struct inode_operations btrfs_symlink_inode_operations = {
2882         .readlink       = generic_readlink,
2883         .follow_link    = page_follow_link_light,
2884         .put_link       = page_put_link,
2885 };
2886
2887 static int __init init_btrfs_fs(void)
2888 {
2889         int err;
2890         printk("btrfs loaded!\n");
2891         err = init_inodecache();
2892         if (err)
2893                 return err;
2894         return register_filesystem(&btrfs_fs_type);
2895         destroy_inodecache();
2896         return err;
2897 }
2898
2899 static void __exit exit_btrfs_fs(void)
2900 {
2901         destroy_inodecache();
2902         unregister_filesystem(&btrfs_fs_type);
2903         printk("btrfs unloaded\n");
2904 }
2905
2906 module_init(init_btrfs_fs)
2907 module_exit(exit_btrfs_fs)
2908
2909 MODULE_LICENSE("GPL");