Btrfs: fixup various fsx failures
[linux-2.6] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
15 #include "ctree.h"
16 #include "disk-io.h"
17 #include "transaction.h"
18 #include "btrfs_inode.h"
19 #include "ioctl.h"
20 #include "print-tree.h"
21
22 struct btrfs_iget_args {
23         u64 ino;
24         struct btrfs_root *root;
25 };
26
27 #define BTRFS_SUPER_MAGIC 0x9123682E
28
29 static struct inode_operations btrfs_dir_inode_operations;
30 static struct inode_operations btrfs_symlink_inode_operations;
31 static struct inode_operations btrfs_dir_ro_inode_operations;
32 static struct super_operations btrfs_super_ops;
33 static struct file_operations btrfs_dir_file_operations;
34 static struct inode_operations btrfs_file_inode_operations;
35 static struct address_space_operations btrfs_aops;
36 static struct address_space_operations btrfs_symlink_aops;
37 static struct file_operations btrfs_file_operations;
38
39 static int drop_extents(struct btrfs_trans_handle *trans,
40                           struct btrfs_root *root,
41                           struct inode *inode,
42                           u64 start, u64 end, u64 *hint_block);
43 static int btrfs_get_block(struct inode *inode, sector_t iblock,
44                            struct buffer_head *result, int create);
45
46 static void btrfs_read_locked_inode(struct inode *inode)
47 {
48         struct btrfs_path *path;
49         struct btrfs_inode_item *inode_item;
50         struct btrfs_root *root = BTRFS_I(inode)->root;
51         struct btrfs_key location;
52         struct btrfs_block_group_cache *alloc_group;
53         u64 alloc_group_block;
54         int ret;
55
56         path = btrfs_alloc_path();
57         BUG_ON(!path);
58         btrfs_init_path(path);
59         mutex_lock(&root->fs_info->fs_mutex);
60
61         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
62         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
63         if (ret) {
64                 btrfs_free_path(path);
65                 goto make_bad;
66         }
67         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
68                                   path->slots[0],
69                                   struct btrfs_inode_item);
70
71         inode->i_mode = btrfs_inode_mode(inode_item);
72         inode->i_nlink = btrfs_inode_nlink(inode_item);
73         inode->i_uid = btrfs_inode_uid(inode_item);
74         inode->i_gid = btrfs_inode_gid(inode_item);
75         inode->i_size = btrfs_inode_size(inode_item);
76         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
77         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
78         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
79         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
80         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
81         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
82         inode->i_blocks = btrfs_inode_nblocks(inode_item);
83         inode->i_generation = btrfs_inode_generation(inode_item);
84         alloc_group_block = btrfs_inode_block_group(inode_item);
85         ret = radix_tree_gang_lookup(&root->fs_info->block_group_radix,
86                                      (void **)&alloc_group,
87                                      alloc_group_block, 1);
88         BUG_ON(!ret);
89         BTRFS_I(inode)->block_group = alloc_group;
90
91         btrfs_free_path(path);
92         inode_item = NULL;
93
94         mutex_unlock(&root->fs_info->fs_mutex);
95
96         switch (inode->i_mode & S_IFMT) {
97 #if 0
98         default:
99                 init_special_inode(inode, inode->i_mode,
100                                    btrfs_inode_rdev(inode_item));
101                 break;
102 #endif
103         case S_IFREG:
104                 inode->i_mapping->a_ops = &btrfs_aops;
105                 inode->i_fop = &btrfs_file_operations;
106                 inode->i_op = &btrfs_file_inode_operations;
107                 break;
108         case S_IFDIR:
109                 inode->i_fop = &btrfs_dir_file_operations;
110                 if (root == root->fs_info->tree_root)
111                         inode->i_op = &btrfs_dir_ro_inode_operations;
112                 else
113                         inode->i_op = &btrfs_dir_inode_operations;
114                 break;
115         case S_IFLNK:
116                 inode->i_op = &btrfs_symlink_inode_operations;
117                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
118                 break;
119         }
120         return;
121
122 make_bad:
123         btrfs_release_path(root, path);
124         btrfs_free_path(path);
125         mutex_unlock(&root->fs_info->fs_mutex);
126         make_bad_inode(inode);
127 }
128
129 static void fill_inode_item(struct btrfs_inode_item *item,
130                             struct inode *inode)
131 {
132         btrfs_set_inode_uid(item, inode->i_uid);
133         btrfs_set_inode_gid(item, inode->i_gid);
134         btrfs_set_inode_size(item, inode->i_size);
135         btrfs_set_inode_mode(item, inode->i_mode);
136         btrfs_set_inode_nlink(item, inode->i_nlink);
137         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
138         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
139         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
140         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
141         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
142         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
143         btrfs_set_inode_nblocks(item, inode->i_blocks);
144         btrfs_set_inode_generation(item, inode->i_generation);
145         btrfs_set_inode_block_group(item,
146                                     BTRFS_I(inode)->block_group->key.objectid);
147 }
148
149 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
150                               struct btrfs_root *root,
151                               struct inode *inode)
152 {
153         struct btrfs_inode_item *inode_item;
154         struct btrfs_path *path;
155         int ret;
156
157         path = btrfs_alloc_path();
158         BUG_ON(!path);
159         btrfs_init_path(path);
160         ret = btrfs_lookup_inode(trans, root, path,
161                                  &BTRFS_I(inode)->location, 1);
162         if (ret) {
163                 if (ret > 0)
164                         ret = -ENOENT;
165                 goto failed;
166         }
167
168         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
169                                   path->slots[0],
170                                   struct btrfs_inode_item);
171
172         fill_inode_item(inode_item, inode);
173         btrfs_mark_buffer_dirty(path->nodes[0]);
174         ret = 0;
175 failed:
176         btrfs_release_path(root, path);
177         btrfs_free_path(path);
178         return ret;
179 }
180
181
182 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
183                               struct btrfs_root *root,
184                               struct inode *dir,
185                               struct dentry *dentry)
186 {
187         struct btrfs_path *path;
188         const char *name = dentry->d_name.name;
189         int name_len = dentry->d_name.len;
190         int ret = 0;
191         u64 objectid;
192         struct btrfs_dir_item *di;
193
194         path = btrfs_alloc_path();
195         BUG_ON(!path);
196         btrfs_init_path(path);
197         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
198                                     name, name_len, -1);
199         if (IS_ERR(di)) {
200                 ret = PTR_ERR(di);
201                 goto err;
202         }
203         if (!di) {
204                 ret = -ENOENT;
205                 goto err;
206         }
207         objectid = btrfs_disk_key_objectid(&di->location);
208         ret = btrfs_delete_one_dir_name(trans, root, path, di);
209         BUG_ON(ret);
210         btrfs_release_path(root, path);
211
212         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
213                                          objectid, name, name_len, -1);
214         if (IS_ERR(di)) {
215                 ret = PTR_ERR(di);
216                 goto err;
217         }
218         if (!di) {
219                 ret = -ENOENT;
220                 goto err;
221         }
222         ret = btrfs_delete_one_dir_name(trans, root, path, di);
223         BUG_ON(ret);
224
225         dentry->d_inode->i_ctime = dir->i_ctime;
226 err:
227         btrfs_free_path(path);
228         if (!ret) {
229                 dir->i_size -= name_len * 2;
230                 btrfs_update_inode(trans, root, dir);
231                 drop_nlink(dentry->d_inode);
232                 btrfs_update_inode(trans, root, dentry->d_inode);
233                 dir->i_sb->s_dirt = 1;
234         }
235         return ret;
236 }
237
238 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
239 {
240         struct btrfs_root *root;
241         struct btrfs_trans_handle *trans;
242         int ret;
243
244         root = BTRFS_I(dir)->root;
245         mutex_lock(&root->fs_info->fs_mutex);
246         trans = btrfs_start_transaction(root, 1);
247         btrfs_set_trans_block_group(trans, dir);
248         ret = btrfs_unlink_trans(trans, root, dir, dentry);
249         btrfs_end_transaction(trans, root);
250         mutex_unlock(&root->fs_info->fs_mutex);
251         btrfs_btree_balance_dirty(root);
252         return ret;
253 }
254
255 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
256 {
257         struct inode *inode = dentry->d_inode;
258         int err;
259         int ret;
260         struct btrfs_root *root = BTRFS_I(dir)->root;
261         struct btrfs_path *path;
262         struct btrfs_key key;
263         struct btrfs_trans_handle *trans;
264         struct btrfs_key found_key;
265         int found_type;
266         struct btrfs_leaf *leaf;
267         char *goodnames = "..";
268
269         path = btrfs_alloc_path();
270         BUG_ON(!path);
271         btrfs_init_path(path);
272         mutex_lock(&root->fs_info->fs_mutex);
273         trans = btrfs_start_transaction(root, 1);
274         btrfs_set_trans_block_group(trans, dir);
275         key.objectid = inode->i_ino;
276         key.offset = (u64)-1;
277         key.flags = (u32)-1;
278         while(1) {
279                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
280                 if (ret < 0) {
281                         err = ret;
282                         goto out;
283                 }
284                 BUG_ON(ret == 0);
285                 if (path->slots[0] == 0) {
286                         err = -ENOENT;
287                         goto out;
288                 }
289                 path->slots[0]--;
290                 leaf = btrfs_buffer_leaf(path->nodes[0]);
291                 btrfs_disk_key_to_cpu(&found_key,
292                                       &leaf->items[path->slots[0]].key);
293                 found_type = btrfs_key_type(&found_key);
294                 if (found_key.objectid != inode->i_ino) {
295                         err = -ENOENT;
296                         goto out;
297                 }
298                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
299                      found_type != BTRFS_DIR_INDEX_KEY) ||
300                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
301                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
302                         err = -ENOTEMPTY;
303                         goto out;
304                 }
305                 ret = btrfs_del_item(trans, root, path);
306                 BUG_ON(ret);
307
308                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
309                         break;
310                 btrfs_release_path(root, path);
311         }
312         ret = 0;
313         btrfs_release_path(root, path);
314
315         /* now the directory is empty */
316         err = btrfs_unlink_trans(trans, root, dir, dentry);
317         if (!err) {
318                 inode->i_size = 0;
319         }
320 out:
321         btrfs_release_path(root, path);
322         btrfs_free_path(path);
323         mutex_unlock(&root->fs_info->fs_mutex);
324         ret = btrfs_end_transaction(trans, root);
325         btrfs_btree_balance_dirty(root);
326         if (ret && !err)
327                 err = ret;
328         return err;
329 }
330
331 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
332                             struct btrfs_root *root,
333                             struct inode *inode)
334 {
335         struct btrfs_path *path;
336         int ret;
337
338         clear_inode(inode);
339
340         path = btrfs_alloc_path();
341         BUG_ON(!path);
342         btrfs_init_path(path);
343         ret = btrfs_lookup_inode(trans, root, path,
344                                  &BTRFS_I(inode)->location, -1);
345         BUG_ON(ret);
346         ret = btrfs_del_item(trans, root, path);
347         BUG_ON(ret);
348         btrfs_free_path(path);
349         return ret;
350 }
351
352 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
353                            u64 objectid)
354 {
355         struct btrfs_node *node;
356         int i;
357         int nritems;
358         u64 item_objectid;
359         u64 blocknr;
360         int slot;
361         int ret;
362
363         if (!path->nodes[1])
364                 return;
365         node = btrfs_buffer_node(path->nodes[1]);
366         slot = path->slots[1];
367         if (slot == 0)
368                 return;
369         nritems = btrfs_header_nritems(&node->header);
370         for (i = slot - 1; i >= 0; i--) {
371                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
372                 if (item_objectid != objectid)
373                         break;
374                 blocknr = btrfs_node_blockptr(node, i);
375                 ret = readahead_tree_block(root, blocknr);
376                 if (ret)
377                         break;
378         }
379 }
380
381 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
382                                    struct btrfs_root *root,
383                                    struct inode *inode)
384 {
385         int ret;
386         struct btrfs_path *path;
387         struct btrfs_key key;
388         struct btrfs_disk_key *found_key;
389         u32 found_type;
390         struct btrfs_leaf *leaf;
391         struct btrfs_file_extent_item *fi;
392         u64 extent_start = 0;
393         u64 extent_num_blocks = 0;
394         u64 item_end = 0;
395         int found_extent;
396         int del_item;
397
398         path = btrfs_alloc_path();
399         BUG_ON(!path);
400         /* FIXME, add redo link to tree so we don't leak on crash */
401         key.objectid = inode->i_ino;
402         key.offset = (u64)-1;
403         key.flags = (u32)-1;
404         while(1) {
405                 btrfs_init_path(path);
406                 fi = NULL;
407                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
408                 if (ret < 0) {
409                         goto error;
410                 }
411                 if (ret > 0) {
412                         BUG_ON(path->slots[0] == 0);
413                         path->slots[0]--;
414                 }
415                 reada_truncate(root, path, inode->i_ino);
416                 leaf = btrfs_buffer_leaf(path->nodes[0]);
417                 found_key = &leaf->items[path->slots[0]].key;
418                 found_type = btrfs_disk_key_type(found_key);
419                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
420                         break;
421                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
422                     found_type != BTRFS_DIR_ITEM_KEY &&
423                     found_type != BTRFS_DIR_INDEX_KEY &&
424                     found_type != BTRFS_EXTENT_DATA_KEY)
425                         break;
426                 item_end = btrfs_disk_key_offset(found_key);
427                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
428                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
429                                             path->slots[0],
430                                             struct btrfs_file_extent_item);
431                         if (btrfs_file_extent_type(fi) !=
432                             BTRFS_FILE_EXTENT_INLINE) {
433                                 item_end += btrfs_file_extent_num_blocks(fi) <<
434                                                 inode->i_blkbits;
435                         }
436                 }
437                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
438                         ret = btrfs_csum_truncate(trans, root, path,
439                                                   inode->i_size);
440                         BUG_ON(ret);
441                 }
442                 if (item_end < inode->i_size) {
443                         if (found_type) {
444                                 btrfs_set_key_type(&key, found_type - 1);
445                                 continue;
446                         }
447                         break;
448                 }
449                 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
450                         del_item = 1;
451                 else
452                         del_item = 0;
453                 found_extent = 0;
454
455                 if (found_type == BTRFS_EXTENT_DATA_KEY &&
456                            btrfs_file_extent_type(fi) !=
457                            BTRFS_FILE_EXTENT_INLINE) {
458                         u64 num_dec;
459                         if (!del_item) {
460                                 u64 orig_num_blocks =
461                                         btrfs_file_extent_num_blocks(fi);
462                                 extent_num_blocks = inode->i_size -
463                                         btrfs_disk_key_offset(found_key) +
464                                         root->blocksize - 1;
465                                 extent_num_blocks >>= inode->i_blkbits;
466                                 btrfs_set_file_extent_num_blocks(fi,
467                                                          extent_num_blocks);
468                                 inode->i_blocks -= (orig_num_blocks -
469                                         extent_num_blocks) << 3;
470                                 mark_buffer_dirty(path->nodes[0]);
471                         } else {
472                                 extent_start =
473                                         btrfs_file_extent_disk_blocknr(fi);
474                                 extent_num_blocks =
475                                         btrfs_file_extent_disk_num_blocks(fi);
476                                 /* FIXME blocksize != 4096 */
477                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
478                                 if (extent_start != 0) {
479                                         found_extent = 1;
480                                         inode->i_blocks -= num_dec;
481                                 }
482                         }
483                 }
484                 if (del_item) {
485                         ret = btrfs_del_item(trans, root, path);
486                         BUG_ON(ret);
487                 } else {
488                         break;
489                 }
490                 btrfs_release_path(root, path);
491                 if (found_extent) {
492                         ret = btrfs_free_extent(trans, root, extent_start,
493                                                 extent_num_blocks, 0);
494                         BUG_ON(ret);
495                 }
496         }
497         ret = 0;
498 error:
499         btrfs_release_path(root, path);
500         btrfs_free_path(path);
501         inode->i_sb->s_dirt = 1;
502         return ret;
503 }
504
505 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
506 {
507         struct inode *inode = mapping->host;
508         unsigned blocksize = 1 << inode->i_blkbits;
509         pgoff_t index = from >> PAGE_CACHE_SHIFT;
510         unsigned offset = from & (PAGE_CACHE_SIZE-1);
511         struct page *page;
512         char *kaddr;
513         int ret = 0;
514         struct btrfs_root *root = BTRFS_I(inode)->root;
515         u64 alloc_hint;
516         struct btrfs_key ins;
517         struct btrfs_trans_handle *trans;
518
519         if ((offset & (blocksize - 1)) == 0)
520                 goto out;
521
522         ret = -ENOMEM;
523         page = grab_cache_page(mapping, index);
524         if (!page)
525                 goto out;
526
527         if (!PageUptodate(page)) {
528                 ret = mpage_readpage(page, btrfs_get_block);
529                 lock_page(page);
530                 if (!PageUptodate(page)) {
531                         ret = -EIO;
532                         goto out;
533                 }
534         }
535         mutex_lock(&root->fs_info->fs_mutex);
536         trans = btrfs_start_transaction(root, 1);
537         btrfs_set_trans_block_group(trans, inode);
538
539         ret = drop_extents(trans, root, inode, page->index << PAGE_CACHE_SHIFT,
540                            (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint);
541         BUG_ON(ret);
542         ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
543                                  alloc_hint, (u64)-1, &ins, 1);
544         BUG_ON(ret);
545         ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
546                                        page->index << PAGE_CACHE_SHIFT,
547                                        ins.objectid, 1, 1);
548         BUG_ON(ret);
549         SetPageChecked(page);
550         kaddr = kmap(page);
551         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
552         flush_dcache_page(page);
553         btrfs_csum_file_block(trans, root, inode->i_ino,
554                               page->index << PAGE_CACHE_SHIFT,
555                               kaddr, PAGE_CACHE_SIZE);
556         kunmap(page);
557         btrfs_end_transaction(trans, root);
558         mutex_unlock(&root->fs_info->fs_mutex);
559
560         set_page_dirty(page);
561         unlock_page(page);
562         page_cache_release(page);
563 out:
564         return ret;
565 }
566
567 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
568 {
569         struct inode *inode = dentry->d_inode;
570         int err;
571
572         err = inode_change_ok(inode, attr);
573         if (err)
574                 return err;
575
576         if (S_ISREG(inode->i_mode) &&
577             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
578                 struct btrfs_trans_handle *trans;
579                 struct btrfs_root *root = BTRFS_I(inode)->root;
580                 u64 mask = root->blocksize - 1;
581                 u64 pos = (inode->i_size + mask) & ~mask;
582                 u64 hole_size;
583
584                 if (attr->ia_size <= pos)
585                         goto out;
586
587                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
588
589                 hole_size = (attr->ia_size - pos + mask) & ~mask;
590                 hole_size >>= inode->i_blkbits;
591
592                 mutex_lock(&root->fs_info->fs_mutex);
593                 trans = btrfs_start_transaction(root, 1);
594                 btrfs_set_trans_block_group(trans, inode);
595                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
596                                                pos, 0, 0, hole_size);
597                 BUG_ON(err);
598                 btrfs_end_transaction(trans, root);
599                 mutex_unlock(&root->fs_info->fs_mutex);
600         }
601 out:
602         err = inode_setattr(inode, attr);
603
604         return err;
605 }
606 static void btrfs_delete_inode(struct inode *inode)
607 {
608         struct btrfs_trans_handle *trans;
609         struct btrfs_root *root = BTRFS_I(inode)->root;
610         int ret;
611
612         truncate_inode_pages(&inode->i_data, 0);
613         if (is_bad_inode(inode)) {
614                 goto no_delete;
615         }
616         inode->i_size = 0;
617         mutex_lock(&root->fs_info->fs_mutex);
618         trans = btrfs_start_transaction(root, 1);
619         btrfs_set_trans_block_group(trans, inode);
620         ret = btrfs_truncate_in_trans(trans, root, inode);
621         BUG_ON(ret);
622         btrfs_free_inode(trans, root, inode);
623         btrfs_end_transaction(trans, root);
624         mutex_unlock(&root->fs_info->fs_mutex);
625         btrfs_btree_balance_dirty(root);
626         return;
627 no_delete:
628         clear_inode(inode);
629 }
630
631 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
632                                struct btrfs_key *location)
633 {
634         const char *name = dentry->d_name.name;
635         int namelen = dentry->d_name.len;
636         struct btrfs_dir_item *di;
637         struct btrfs_path *path;
638         struct btrfs_root *root = BTRFS_I(dir)->root;
639         int ret;
640
641         path = btrfs_alloc_path();
642         BUG_ON(!path);
643         btrfs_init_path(path);
644         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
645                                     namelen, 0);
646         if (!di || IS_ERR(di)) {
647                 location->objectid = 0;
648                 ret = 0;
649                 goto out;
650         }
651         btrfs_disk_key_to_cpu(location, &di->location);
652 out:
653         btrfs_release_path(root, path);
654         btrfs_free_path(path);
655         return ret;
656 }
657
658 static int fixup_tree_root_location(struct btrfs_root *root,
659                              struct btrfs_key *location,
660                              struct btrfs_root **sub_root)
661 {
662         struct btrfs_path *path;
663         struct btrfs_root_item *ri;
664
665         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
666                 return 0;
667         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
668                 return 0;
669
670         path = btrfs_alloc_path();
671         BUG_ON(!path);
672         mutex_lock(&root->fs_info->fs_mutex);
673
674         *sub_root = btrfs_read_fs_root(root->fs_info, location);
675         if (IS_ERR(*sub_root))
676                 return PTR_ERR(*sub_root);
677
678         ri = &(*sub_root)->root_item;
679         location->objectid = btrfs_root_dirid(ri);
680         location->flags = 0;
681         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
682         location->offset = 0;
683
684         btrfs_free_path(path);
685         mutex_unlock(&root->fs_info->fs_mutex);
686         return 0;
687 }
688
689 static int btrfs_init_locked_inode(struct inode *inode, void *p)
690 {
691         struct btrfs_iget_args *args = p;
692         inode->i_ino = args->ino;
693         BTRFS_I(inode)->root = args->root;
694         return 0;
695 }
696
697 static int btrfs_find_actor(struct inode *inode, void *opaque)
698 {
699         struct btrfs_iget_args *args = opaque;
700         return (args->ino == inode->i_ino &&
701                 args->root == BTRFS_I(inode)->root);
702 }
703
704 static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
705                                        struct btrfs_root *root)
706 {
707         struct inode *inode;
708         struct btrfs_iget_args args;
709         args.ino = objectid;
710         args.root = root;
711
712         inode = iget5_locked(s, objectid, btrfs_find_actor,
713                              btrfs_init_locked_inode,
714                              (void *)&args);
715         return inode;
716 }
717
718 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
719                                    struct nameidata *nd)
720 {
721         struct inode * inode;
722         struct btrfs_inode *bi = BTRFS_I(dir);
723         struct btrfs_root *root = bi->root;
724         struct btrfs_root *sub_root = root;
725         struct btrfs_key location;
726         int ret;
727
728         if (dentry->d_name.len > BTRFS_NAME_LEN)
729                 return ERR_PTR(-ENAMETOOLONG);
730         mutex_lock(&root->fs_info->fs_mutex);
731         ret = btrfs_inode_by_name(dir, dentry, &location);
732         mutex_unlock(&root->fs_info->fs_mutex);
733         if (ret < 0)
734                 return ERR_PTR(ret);
735         inode = NULL;
736         if (location.objectid) {
737                 ret = fixup_tree_root_location(root, &location, &sub_root);
738                 if (ret < 0)
739                         return ERR_PTR(ret);
740                 if (ret > 0)
741                         return ERR_PTR(-ENOENT);
742                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
743                                           sub_root);
744                 if (!inode)
745                         return ERR_PTR(-EACCES);
746                 if (inode->i_state & I_NEW) {
747                         if (sub_root != root) {
748 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
749                                 igrab(inode);
750                                 sub_root->inode = inode;
751                         }
752                         BTRFS_I(inode)->root = sub_root;
753                         memcpy(&BTRFS_I(inode)->location, &location,
754                                sizeof(location));
755                         btrfs_read_locked_inode(inode);
756                         unlock_new_inode(inode);
757                 }
758         }
759         return d_splice_alias(inode, dentry);
760 }
761
762 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
763                          u64 objectid)
764 {
765         struct btrfs_node *node;
766         int i;
767         u32 nritems;
768         u64 item_objectid;
769         u64 blocknr;
770         int slot;
771         int ret;
772
773         if (!path->nodes[1])
774                 return;
775         node = btrfs_buffer_node(path->nodes[1]);
776         slot = path->slots[1];
777         nritems = btrfs_header_nritems(&node->header);
778         for (i = slot + 1; i < nritems; i++) {
779                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
780                 if (item_objectid != objectid)
781                         break;
782                 blocknr = btrfs_node_blockptr(node, i);
783                 ret = readahead_tree_block(root, blocknr);
784                 if (ret)
785                         break;
786         }
787 }
788
789 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
790 {
791         struct inode *inode = filp->f_path.dentry->d_inode;
792         struct btrfs_root *root = BTRFS_I(inode)->root;
793         struct btrfs_item *item;
794         struct btrfs_dir_item *di;
795         struct btrfs_key key;
796         struct btrfs_path *path;
797         int ret;
798         u32 nritems;
799         struct btrfs_leaf *leaf;
800         int slot;
801         int advance;
802         unsigned char d_type = DT_UNKNOWN;
803         int over = 0;
804         u32 di_cur;
805         u32 di_total;
806         u32 di_len;
807         int key_type = BTRFS_DIR_INDEX_KEY;
808
809         /* FIXME, use a real flag for deciding about the key type */
810         if (root->fs_info->tree_root == root)
811                 key_type = BTRFS_DIR_ITEM_KEY;
812         mutex_lock(&root->fs_info->fs_mutex);
813         key.objectid = inode->i_ino;
814         key.flags = 0;
815         btrfs_set_key_type(&key, key_type);
816         key.offset = filp->f_pos;
817         path = btrfs_alloc_path();
818         btrfs_init_path(path);
819         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
820         if (ret < 0)
821                 goto err;
822         advance = 0;
823         reada_leaves(root, path, inode->i_ino);
824         while(1) {
825                 leaf = btrfs_buffer_leaf(path->nodes[0]);
826                 nritems = btrfs_header_nritems(&leaf->header);
827                 slot = path->slots[0];
828                 if (advance || slot >= nritems) {
829                         if (slot >= nritems -1) {
830                                 reada_leaves(root, path, inode->i_ino);
831                                 ret = btrfs_next_leaf(root, path);
832                                 if (ret)
833                                         break;
834                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
835                                 nritems = btrfs_header_nritems(&leaf->header);
836                                 slot = path->slots[0];
837                         } else {
838                                 slot++;
839                                 path->slots[0]++;
840                         }
841                 }
842                 advance = 1;
843                 item = leaf->items + slot;
844                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
845                         break;
846                 if (btrfs_disk_key_type(&item->key) != key_type)
847                         break;
848                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
849                         continue;
850                 filp->f_pos = btrfs_disk_key_offset(&item->key);
851                 advance = 1;
852                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
853                 di_cur = 0;
854                 di_total = btrfs_item_size(leaf->items + slot);
855                 while(di_cur < di_total) {
856                         over = filldir(dirent, (const char *)(di + 1),
857                                        btrfs_dir_name_len(di),
858                                        btrfs_disk_key_offset(&item->key),
859                                        btrfs_disk_key_objectid(&di->location),
860                                        d_type);
861                         if (over)
862                                 goto nopos;
863                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
864                         di_cur += di_len;
865                         di = (struct btrfs_dir_item *)((char *)di + di_len);
866                 }
867         }
868         filp->f_pos++;
869 nopos:
870         ret = 0;
871 err:
872         btrfs_release_path(root, path);
873         btrfs_free_path(path);
874         mutex_unlock(&root->fs_info->fs_mutex);
875         return ret;
876 }
877
878 static void btrfs_put_super (struct super_block * sb)
879 {
880         struct btrfs_root *root = btrfs_sb(sb);
881         int ret;
882
883         ret = close_ctree(root);
884         if (ret) {
885                 printk("close ctree returns %d\n", ret);
886         }
887         sb->s_fs_info = NULL;
888 }
889
890 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
891 {
892         struct inode * inode;
893         struct dentry * root_dentry;
894         struct btrfs_super_block *disk_super;
895         struct btrfs_root *tree_root;
896         struct btrfs_inode *bi;
897
898         sb->s_maxbytes = MAX_LFS_FILESIZE;
899         sb->s_magic = BTRFS_SUPER_MAGIC;
900         sb->s_op = &btrfs_super_ops;
901         sb->s_time_gran = 1;
902
903         tree_root = open_ctree(sb);
904
905         if (!tree_root) {
906                 printk("btrfs: open_ctree failed\n");
907                 return -EIO;
908         }
909         sb->s_fs_info = tree_root;
910         disk_super = tree_root->fs_info->disk_super;
911         printk("read in super total blocks %Lu root %Lu\n",
912                btrfs_super_total_blocks(disk_super),
913                btrfs_super_root_dir(disk_super));
914
915         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
916                                   tree_root);
917         bi = BTRFS_I(inode);
918         bi->location.objectid = inode->i_ino;
919         bi->location.offset = 0;
920         bi->location.flags = 0;
921         bi->root = tree_root;
922         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
923
924         if (!inode)
925                 return -ENOMEM;
926         if (inode->i_state & I_NEW) {
927                 btrfs_read_locked_inode(inode);
928                 unlock_new_inode(inode);
929         }
930
931         root_dentry = d_alloc_root(inode);
932         if (!root_dentry) {
933                 iput(inode);
934                 return -ENOMEM;
935         }
936         sb->s_root = root_dentry;
937
938         return 0;
939 }
940
941 static int btrfs_write_inode(struct inode *inode, int wait)
942 {
943         struct btrfs_root *root = BTRFS_I(inode)->root;
944         struct btrfs_trans_handle *trans;
945         int ret = 0;
946
947         if (wait) {
948                 mutex_lock(&root->fs_info->fs_mutex);
949                 trans = btrfs_start_transaction(root, 1);
950                 btrfs_set_trans_block_group(trans, inode);
951                 ret = btrfs_commit_transaction(trans, root);
952                 mutex_unlock(&root->fs_info->fs_mutex);
953         }
954         return ret;
955 }
956
957 static void btrfs_dirty_inode(struct inode *inode)
958 {
959         struct btrfs_root *root = BTRFS_I(inode)->root;
960         struct btrfs_trans_handle *trans;
961
962         mutex_lock(&root->fs_info->fs_mutex);
963         trans = btrfs_start_transaction(root, 1);
964         btrfs_set_trans_block_group(trans, inode);
965         btrfs_update_inode(trans, root, inode);
966         btrfs_end_transaction(trans, root);
967         mutex_unlock(&root->fs_info->fs_mutex);
968         btrfs_btree_balance_dirty(root);
969 }
970
971 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
972                                      struct btrfs_root *root,
973                                      u64 objectid,
974                                      struct btrfs_block_group_cache *group,
975                                      int mode)
976 {
977         struct inode *inode;
978         struct btrfs_inode_item inode_item;
979         struct btrfs_key *location;
980         int ret;
981         int owner;
982
983         inode = new_inode(root->fs_info->sb);
984         if (!inode)
985                 return ERR_PTR(-ENOMEM);
986
987         BTRFS_I(inode)->root = root;
988         if (mode & S_IFDIR)
989                 owner = 0;
990         else
991                 owner = 1;
992         group = btrfs_find_block_group(root, group, 0, 0, owner);
993         BTRFS_I(inode)->block_group = group;
994
995         inode->i_uid = current->fsuid;
996         inode->i_gid = current->fsgid;
997         inode->i_mode = mode;
998         inode->i_ino = objectid;
999         inode->i_blocks = 0;
1000         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1001         fill_inode_item(&inode_item, inode);
1002         location = &BTRFS_I(inode)->location;
1003         location->objectid = objectid;
1004         location->flags = 0;
1005         location->offset = 0;
1006         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1007
1008         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
1009         BUG_ON(ret);
1010
1011         insert_inode_hash(inode);
1012         return inode;
1013 }
1014
1015 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1016                             struct dentry *dentry, struct inode *inode)
1017 {
1018         int ret;
1019         struct btrfs_key key;
1020         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1021         key.objectid = inode->i_ino;
1022         key.flags = 0;
1023         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1024         key.offset = 0;
1025
1026         ret = btrfs_insert_dir_item(trans, root,
1027                                     dentry->d_name.name, dentry->d_name.len,
1028                                     dentry->d_parent->d_inode->i_ino,
1029                                     &key, 0);
1030         if (ret == 0) {
1031                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1032                 ret = btrfs_update_inode(trans, root,
1033                                          dentry->d_parent->d_inode);
1034         }
1035         return ret;
1036 }
1037
1038 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1039                             struct dentry *dentry, struct inode *inode)
1040 {
1041         int err = btrfs_add_link(trans, dentry, inode);
1042         if (!err) {
1043                 d_instantiate(dentry, inode);
1044                 return 0;
1045         }
1046         if (err > 0)
1047                 err = -EEXIST;
1048         return err;
1049 }
1050
1051 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1052                         int mode, struct nameidata *nd)
1053 {
1054         struct btrfs_trans_handle *trans;
1055         struct btrfs_root *root = BTRFS_I(dir)->root;
1056         struct inode *inode;
1057         int err;
1058         int drop_inode = 0;
1059         u64 objectid;
1060
1061         mutex_lock(&root->fs_info->fs_mutex);
1062         trans = btrfs_start_transaction(root, 1);
1063         btrfs_set_trans_block_group(trans, dir);
1064
1065         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1066         if (err) {
1067                 err = -ENOSPC;
1068                 goto out_unlock;
1069         }
1070
1071         inode = btrfs_new_inode(trans, root, objectid,
1072                                 BTRFS_I(dir)->block_group, mode);
1073         err = PTR_ERR(inode);
1074         if (IS_ERR(inode))
1075                 goto out_unlock;
1076
1077         btrfs_set_trans_block_group(trans, inode);
1078         err = btrfs_add_nondir(trans, dentry, inode);
1079         if (err)
1080                 drop_inode = 1;
1081         else {
1082                 inode->i_mapping->a_ops = &btrfs_aops;
1083                 inode->i_fop = &btrfs_file_operations;
1084                 inode->i_op = &btrfs_file_inode_operations;
1085         }
1086         dir->i_sb->s_dirt = 1;
1087         btrfs_update_inode_block_group(trans, inode);
1088         btrfs_update_inode_block_group(trans, dir);
1089 out_unlock:
1090         btrfs_end_transaction(trans, root);
1091         mutex_unlock(&root->fs_info->fs_mutex);
1092
1093         if (drop_inode) {
1094                 inode_dec_link_count(inode);
1095                 iput(inode);
1096         }
1097         btrfs_btree_balance_dirty(root);
1098         return err;
1099 }
1100
1101 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1102                       struct dentry *dentry)
1103 {
1104         struct btrfs_trans_handle *trans;
1105         struct btrfs_root *root = BTRFS_I(dir)->root;
1106         struct inode *inode = old_dentry->d_inode;
1107         int err;
1108         int drop_inode = 0;
1109
1110         if (inode->i_nlink == 0)
1111                 return -ENOENT;
1112
1113         inc_nlink(inode);
1114         mutex_lock(&root->fs_info->fs_mutex);
1115         trans = btrfs_start_transaction(root, 1);
1116         btrfs_set_trans_block_group(trans, dir);
1117         atomic_inc(&inode->i_count);
1118         err = btrfs_add_nondir(trans, dentry, inode);
1119         if (err)
1120                 drop_inode = 1;
1121         dir->i_sb->s_dirt = 1;
1122         btrfs_update_inode_block_group(trans, dir);
1123         btrfs_update_inode(trans, root, inode);
1124
1125         btrfs_end_transaction(trans, root);
1126         mutex_unlock(&root->fs_info->fs_mutex);
1127
1128         if (drop_inode) {
1129                 inode_dec_link_count(inode);
1130                 iput(inode);
1131         }
1132         btrfs_btree_balance_dirty(root);
1133         return err;
1134 }
1135
1136 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1137                                 struct btrfs_root *root,
1138                                 u64 objectid, u64 dirid)
1139 {
1140         int ret;
1141         char buf[2];
1142         struct btrfs_key key;
1143
1144         buf[0] = '.';
1145         buf[1] = '.';
1146
1147         key.objectid = objectid;
1148         key.offset = 0;
1149         key.flags = 0;
1150         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1151
1152         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1153                                     &key, 1);
1154         if (ret)
1155                 goto error;
1156         key.objectid = dirid;
1157         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1158                                     &key, 1);
1159         if (ret)
1160                 goto error;
1161 error:
1162         return ret;
1163 }
1164
1165 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1166 {
1167         struct inode *inode;
1168         struct btrfs_trans_handle *trans;
1169         struct btrfs_root *root = BTRFS_I(dir)->root;
1170         int err = 0;
1171         int drop_on_err = 0;
1172         u64 objectid;
1173
1174         mutex_lock(&root->fs_info->fs_mutex);
1175         trans = btrfs_start_transaction(root, 1);
1176         btrfs_set_trans_block_group(trans, dir);
1177         if (IS_ERR(trans)) {
1178                 err = PTR_ERR(trans);
1179                 goto out_unlock;
1180         }
1181
1182         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1183         if (err) {
1184                 err = -ENOSPC;
1185                 goto out_unlock;
1186         }
1187
1188         inode = btrfs_new_inode(trans, root, objectid,
1189                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1190         if (IS_ERR(inode)) {
1191                 err = PTR_ERR(inode);
1192                 goto out_fail;
1193         }
1194         drop_on_err = 1;
1195         inode->i_op = &btrfs_dir_inode_operations;
1196         inode->i_fop = &btrfs_dir_file_operations;
1197         btrfs_set_trans_block_group(trans, inode);
1198
1199         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1200         if (err)
1201                 goto out_fail;
1202
1203         inode->i_size = 6;
1204         err = btrfs_update_inode(trans, root, inode);
1205         if (err)
1206                 goto out_fail;
1207         err = btrfs_add_link(trans, dentry, inode);
1208         if (err)
1209                 goto out_fail;
1210         d_instantiate(dentry, inode);
1211         drop_on_err = 0;
1212         dir->i_sb->s_dirt = 1;
1213         btrfs_update_inode_block_group(trans, inode);
1214         btrfs_update_inode_block_group(trans, dir);
1215
1216 out_fail:
1217         btrfs_end_transaction(trans, root);
1218 out_unlock:
1219         mutex_unlock(&root->fs_info->fs_mutex);
1220         if (drop_on_err)
1221                 iput(inode);
1222         btrfs_btree_balance_dirty(root);
1223         return err;
1224 }
1225
1226 static int btrfs_sync_file(struct file *file,
1227                            struct dentry *dentry, int datasync)
1228 {
1229         struct inode *inode = dentry->d_inode;
1230         struct btrfs_root *root = BTRFS_I(inode)->root;
1231         int ret;
1232         struct btrfs_trans_handle *trans;
1233
1234         mutex_lock(&root->fs_info->fs_mutex);
1235         trans = btrfs_start_transaction(root, 1);
1236         if (!trans) {
1237                 ret = -ENOMEM;
1238                 goto out;
1239         }
1240         ret = btrfs_commit_transaction(trans, root);
1241         mutex_unlock(&root->fs_info->fs_mutex);
1242 out:
1243         return ret > 0 ? EIO : ret;
1244 }
1245
1246 static int btrfs_sync_fs(struct super_block *sb, int wait)
1247 {
1248         struct btrfs_trans_handle *trans;
1249         struct btrfs_root *root;
1250         int ret;
1251         root = btrfs_sb(sb);
1252
1253         sb->s_dirt = 0;
1254         if (!wait) {
1255                 filemap_flush(root->fs_info->btree_inode->i_mapping);
1256                 return 0;
1257         }
1258         mutex_lock(&root->fs_info->fs_mutex);
1259         trans = btrfs_start_transaction(root, 1);
1260         ret = btrfs_commit_transaction(trans, root);
1261         sb->s_dirt = 0;
1262         BUG_ON(ret);
1263 printk("btrfs sync_fs\n");
1264         mutex_unlock(&root->fs_info->fs_mutex);
1265         return 0;
1266 }
1267
1268 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1269                            struct buffer_head *result, int create)
1270 {
1271         int ret;
1272         int err = 0;
1273         u64 blocknr;
1274         u64 extent_start = 0;
1275         u64 extent_end = 0;
1276         u64 objectid = inode->i_ino;
1277         u32 found_type;
1278         u64 alloc_hint = 0;
1279         struct btrfs_path *path;
1280         struct btrfs_root *root = BTRFS_I(inode)->root;
1281         struct btrfs_file_extent_item *item;
1282         struct btrfs_leaf *leaf;
1283         struct btrfs_disk_key *found_key;
1284         struct btrfs_trans_handle *trans = NULL;
1285
1286         path = btrfs_alloc_path();
1287         BUG_ON(!path);
1288         btrfs_init_path(path);
1289         if (create) {
1290                 WARN_ON(1);
1291                 /* this almost but not quite works */
1292                 trans = btrfs_start_transaction(root, 1);
1293                 if (!trans) {
1294                         err = -ENOMEM;
1295                         goto out;
1296                 }
1297                 ret = drop_extents(trans, root, inode,
1298                                    iblock << inode->i_blkbits,
1299                                    (iblock + 1) << inode->i_blkbits,
1300                                    &alloc_hint);
1301                 BUG_ON(ret);
1302         }
1303
1304         ret = btrfs_lookup_file_extent(NULL, root, path,
1305                                        inode->i_ino,
1306                                        iblock << inode->i_blkbits, 0);
1307         if (ret < 0) {
1308                 err = ret;
1309                 goto out;
1310         }
1311
1312         if (ret != 0) {
1313                 if (path->slots[0] == 0) {
1314                         btrfs_release_path(root, path);
1315                         goto not_found;
1316                 }
1317                 path->slots[0]--;
1318         }
1319
1320         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1321                               struct btrfs_file_extent_item);
1322         leaf = btrfs_buffer_leaf(path->nodes[0]);
1323         blocknr = btrfs_file_extent_disk_blocknr(item);
1324         blocknr += btrfs_file_extent_offset(item);
1325
1326         /* are we inside the extent that was found? */
1327         found_key = &leaf->items[path->slots[0]].key;
1328         found_type = btrfs_disk_key_type(found_key);
1329         if (btrfs_disk_key_objectid(found_key) != objectid ||
1330             found_type != BTRFS_EXTENT_DATA_KEY) {
1331                 extent_end = 0;
1332                 extent_start = 0;
1333                 goto not_found;
1334         }
1335         found_type = btrfs_file_extent_type(item);
1336         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1337         if (found_type == BTRFS_FILE_EXTENT_REG) {
1338                 extent_start = extent_start >> inode->i_blkbits;
1339                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1340                 err = 0;
1341                 if (btrfs_file_extent_disk_blocknr(item) == 0)
1342                         goto out;
1343                 if (iblock >= extent_start && iblock < extent_end) {
1344                         btrfs_map_bh_to_logical(root, result, blocknr +
1345                                                 iblock - extent_start);
1346                         goto out;
1347                 }
1348         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1349                 char *ptr;
1350                 char *map;
1351                 u32 size;
1352                 size = btrfs_file_extent_inline_len(leaf->items +
1353                                                     path->slots[0]);
1354                 extent_end = (extent_start + size) >> inode->i_blkbits;
1355                 extent_start >>= inode->i_blkbits;
1356                 if (iblock < extent_start || iblock > extent_end) {
1357                         goto not_found;
1358                 }
1359                 ptr = btrfs_file_extent_inline_start(item);
1360                 map = kmap(result->b_page);
1361                 memcpy(map, ptr, size);
1362                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1363                 flush_dcache_page(result->b_page);
1364                 kunmap(result->b_page);
1365                 set_buffer_uptodate(result);
1366                 SetPageChecked(result->b_page);
1367                 btrfs_map_bh_to_logical(root, result, 0);
1368         }
1369 not_found:
1370         if (create) {
1371                 struct btrfs_key ins;
1372                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1373                                          1, alloc_hint, (u64)-1,
1374                                          &ins, 1);
1375                 BUG_ON(ret);
1376                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1377                                                iblock << inode->i_blkbits,
1378                                                ins.objectid, ins.offset,
1379                                                ins.offset);
1380                 BUG_ON(ret);
1381                 SetPageChecked(result->b_page);
1382                 btrfs_map_bh_to_logical(root, result, ins.objectid);
1383         }
1384 out:
1385         if (trans)
1386                 err = btrfs_end_transaction(trans, root);
1387         btrfs_free_path(path);
1388         return err;
1389 }
1390
1391 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1392                            struct buffer_head *result, int create)
1393 {
1394         int err;
1395         struct btrfs_root *root = BTRFS_I(inode)->root;
1396         mutex_lock(&root->fs_info->fs_mutex);
1397         err = btrfs_get_block_lock(inode, iblock, result, create);
1398         mutex_unlock(&root->fs_info->fs_mutex);
1399         return err;
1400 }
1401
1402 static int btrfs_prepare_write(struct file *file, struct page *page,
1403                                unsigned from, unsigned to)
1404 {
1405         return block_prepare_write(page, from, to, btrfs_get_block);
1406 }
1407
1408 static void btrfs_write_super(struct super_block *sb)
1409 {
1410         btrfs_sync_fs(sb, 1);
1411 }
1412
1413 static int btrfs_readpage(struct file *file, struct page *page)
1414 {
1415         return mpage_readpage(page, btrfs_get_block);
1416 }
1417
1418 /*
1419  * While block_write_full_page is writing back the dirty buffers under
1420  * the page lock, whoever dirtied the buffers may decide to clean them
1421  * again at any time.  We handle that by only looking at the buffer
1422  * state inside lock_buffer().
1423  *
1424  * If block_write_full_page() is called for regular writeback
1425  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1426  * locked buffer.   This only can happen if someone has written the buffer
1427  * directly, with submit_bh().  At the address_space level PageWriteback
1428  * prevents this contention from occurring.
1429  */
1430 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1431                                    struct writeback_control *wbc)
1432 {
1433         int err;
1434         sector_t block;
1435         sector_t last_block;
1436         struct buffer_head *bh, *head;
1437         const unsigned blocksize = 1 << inode->i_blkbits;
1438         int nr_underway = 0;
1439
1440         BUG_ON(!PageLocked(page));
1441
1442         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1443
1444         if (!page_has_buffers(page)) {
1445                 create_empty_buffers(page, blocksize,
1446                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1447         }
1448
1449         /*
1450          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1451          * here, and the (potentially unmapped) buffers may become dirty at
1452          * any time.  If a buffer becomes dirty here after we've inspected it
1453          * then we just miss that fact, and the page stays dirty.
1454          *
1455          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1456          * handle that here by just cleaning them.
1457          */
1458
1459         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1460         head = page_buffers(page);
1461         bh = head;
1462
1463         /*
1464          * Get all the dirty buffers mapped to disk addresses and
1465          * handle any aliases from the underlying blockdev's mapping.
1466          */
1467         do {
1468                 if (block > last_block) {
1469                         /*
1470                          * mapped buffers outside i_size will occur, because
1471                          * this page can be outside i_size when there is a
1472                          * truncate in progress.
1473                          */
1474                         /*
1475                          * The buffer was zeroed by block_write_full_page()
1476                          */
1477                         clear_buffer_dirty(bh);
1478                         set_buffer_uptodate(bh);
1479                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1480                         WARN_ON(bh->b_size != blocksize);
1481                         err = btrfs_get_block(inode, block, bh, 0);
1482                         if (err) {
1483 printk("writepage going to recovery err %d\n", err);
1484                                 goto recover;
1485                         }
1486                         if (buffer_new(bh)) {
1487                                 /* blockdev mappings never come here */
1488                                 clear_buffer_new(bh);
1489                         }
1490                 }
1491                 bh = bh->b_this_page;
1492                 block++;
1493         } while (bh != head);
1494
1495         do {
1496                 if (!buffer_mapped(bh))
1497                         continue;
1498                 /*
1499                  * If it's a fully non-blocking write attempt and we cannot
1500                  * lock the buffer then redirty the page.  Note that this can
1501                  * potentially cause a busy-wait loop from pdflush and kswapd
1502                  * activity, but those code paths have their own higher-level
1503                  * throttling.
1504                  */
1505                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1506                         lock_buffer(bh);
1507                 } else if (test_set_buffer_locked(bh)) {
1508                         redirty_page_for_writepage(wbc, page);
1509                         continue;
1510                 }
1511                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1512                         mark_buffer_async_write(bh);
1513                 } else {
1514                         unlock_buffer(bh);
1515                 }
1516         } while ((bh = bh->b_this_page) != head);
1517
1518         /*
1519          * The page and its buffers are protected by PageWriteback(), so we can
1520          * drop the bh refcounts early.
1521          */
1522         BUG_ON(PageWriteback(page));
1523         set_page_writeback(page);
1524
1525         do {
1526                 struct buffer_head *next = bh->b_this_page;
1527                 if (buffer_async_write(bh)) {
1528                         submit_bh(WRITE, bh);
1529                         nr_underway++;
1530                 }
1531                 bh = next;
1532         } while (bh != head);
1533         unlock_page(page);
1534
1535         err = 0;
1536 done:
1537         if (nr_underway == 0) {
1538                 /*
1539                  * The page was marked dirty, but the buffers were
1540                  * clean.  Someone wrote them back by hand with
1541                  * ll_rw_block/submit_bh.  A rare case.
1542                  */
1543                 int uptodate = 1;
1544                 do {
1545                         if (!buffer_uptodate(bh)) {
1546                                 uptodate = 0;
1547                                 break;
1548                         }
1549                         bh = bh->b_this_page;
1550                 } while (bh != head);
1551                 if (uptodate)
1552                         SetPageUptodate(page);
1553                 end_page_writeback(page);
1554         }
1555         return err;
1556
1557 recover:
1558         /*
1559          * ENOSPC, or some other error.  We may already have added some
1560          * blocks to the file, so we need to write these out to avoid
1561          * exposing stale data.
1562          * The page is currently locked and not marked for writeback
1563          */
1564         bh = head;
1565         /* Recovery: lock and submit the mapped buffers */
1566         do {
1567                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1568                         lock_buffer(bh);
1569                         mark_buffer_async_write(bh);
1570                 } else {
1571                         /*
1572                          * The buffer may have been set dirty during
1573                          * attachment to a dirty page.
1574                          */
1575                         clear_buffer_dirty(bh);
1576                 }
1577         } while ((bh = bh->b_this_page) != head);
1578         SetPageError(page);
1579         BUG_ON(PageWriteback(page));
1580         set_page_writeback(page);
1581         do {
1582                 struct buffer_head *next = bh->b_this_page;
1583                 if (buffer_async_write(bh)) {
1584                         clear_buffer_dirty(bh);
1585                         submit_bh(WRITE, bh);
1586                         nr_underway++;
1587                 }
1588                 bh = next;
1589         } while (bh != head);
1590         unlock_page(page);
1591         goto done;
1592 }
1593
1594 /*
1595  * The generic ->writepage function for buffer-backed address_spaces
1596  */
1597 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1598 {
1599         struct inode * const inode = page->mapping->host;
1600         loff_t i_size = i_size_read(inode);
1601         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1602         unsigned offset;
1603         void *kaddr;
1604
1605         /* Is the page fully inside i_size? */
1606         if (page->index < end_index)
1607                 return __btrfs_write_full_page(inode, page, wbc);
1608
1609         /* Is the page fully outside i_size? (truncate in progress) */
1610         offset = i_size & (PAGE_CACHE_SIZE-1);
1611         if (page->index >= end_index+1 || !offset) {
1612                 /*
1613                  * The page may have dirty, unmapped buffers.  For example,
1614                  * they may have been added in ext3_writepage().  Make them
1615                  * freeable here, so the page does not leak.
1616                  */
1617                 block_invalidatepage(page, 0);
1618                 unlock_page(page);
1619                 return 0; /* don't care */
1620         }
1621
1622         /*
1623          * The page straddles i_size.  It must be zeroed out on each and every
1624          * writepage invokation because it may be mmapped.  "A file is mapped
1625          * in multiples of the page size.  For a file that is not a multiple of
1626          * the  page size, the remaining memory is zeroed when mapped, and
1627          * writes to that region are not written out to the file."
1628          */
1629         kaddr = kmap_atomic(page, KM_USER0);
1630         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1631         flush_dcache_page(page);
1632         kunmap_atomic(kaddr, KM_USER0);
1633         return __btrfs_write_full_page(inode, page, wbc);
1634 }
1635
1636 static void btrfs_truncate(struct inode *inode)
1637 {
1638         struct btrfs_root *root = BTRFS_I(inode)->root;
1639         int ret;
1640         struct btrfs_trans_handle *trans;
1641
1642         if (!S_ISREG(inode->i_mode))
1643                 return;
1644         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1645                 return;
1646
1647         btrfs_truncate_page(inode->i_mapping, inode->i_size);
1648
1649         mutex_lock(&root->fs_info->fs_mutex);
1650         trans = btrfs_start_transaction(root, 1);
1651         btrfs_set_trans_block_group(trans, inode);
1652
1653         /* FIXME, add redo link to tree so we don't leak on crash */
1654         ret = btrfs_truncate_in_trans(trans, root, inode);
1655         BUG_ON(ret);
1656         btrfs_update_inode(trans, root, inode);
1657         ret = btrfs_end_transaction(trans, root);
1658         BUG_ON(ret);
1659         mutex_unlock(&root->fs_info->fs_mutex);
1660         btrfs_btree_balance_dirty(root);
1661 }
1662
1663 static int btrfs_commit_write(struct file *file, struct page *page,
1664                               unsigned from, unsigned to)
1665 {
1666         struct inode *inode = page->mapping->host;
1667         struct buffer_head *bh;
1668         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1669
1670         SetPageUptodate(page);
1671         bh = page_buffers(page);
1672         set_buffer_uptodate(bh);
1673         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1674                 set_page_dirty(page);
1675         }
1676         if (pos > inode->i_size) {
1677                 i_size_write(inode, pos);
1678                 mark_inode_dirty(inode);
1679         }
1680         return 0;
1681 }
1682
1683 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1684                                 struct page **prepared_pages,
1685                                 const char __user * buf)
1686 {
1687         long page_fault = 0;
1688         int i;
1689         int offset = pos & (PAGE_CACHE_SIZE - 1);
1690
1691         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1692                 size_t count = min_t(size_t,
1693                                      PAGE_CACHE_SIZE - offset, write_bytes);
1694                 struct page *page = prepared_pages[i];
1695                 fault_in_pages_readable(buf, count);
1696
1697                 /* Copy data from userspace to the current page */
1698                 kmap(page);
1699                 page_fault = __copy_from_user(page_address(page) + offset,
1700                                               buf, count);
1701                 /* Flush processor's dcache for this page */
1702                 flush_dcache_page(page);
1703                 kunmap(page);
1704                 buf += count;
1705                 write_bytes -= count;
1706
1707                 if (page_fault)
1708                         break;
1709         }
1710         return page_fault ? -EFAULT : 0;
1711 }
1712
1713 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1714 {
1715         size_t i;
1716         for (i = 0; i < num_pages; i++) {
1717                 if (!pages[i])
1718                         break;
1719                 unlock_page(pages[i]);
1720                 mark_page_accessed(pages[i]);
1721                 page_cache_release(pages[i]);
1722         }
1723 }
1724 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1725                                    struct btrfs_root *root,
1726                                    struct file *file,
1727                                    struct page **pages,
1728                                    size_t num_pages,
1729                                    loff_t pos,
1730                                    size_t write_bytes)
1731 {
1732         int i;
1733         int offset;
1734         int err = 0;
1735         int ret;
1736         int this_write;
1737         struct inode *inode = file->f_path.dentry->d_inode;
1738         struct buffer_head *bh;
1739         struct btrfs_file_extent_item *ei;
1740
1741         for (i = 0; i < num_pages; i++) {
1742                 offset = pos & (PAGE_CACHE_SIZE -1);
1743                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1744                 /* FIXME, one block at a time */
1745
1746                 mutex_lock(&root->fs_info->fs_mutex);
1747                 trans = btrfs_start_transaction(root, 1);
1748                 btrfs_set_trans_block_group(trans, inode);
1749
1750                 bh = page_buffers(pages[i]);
1751                 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1752                         struct btrfs_key key;
1753                         struct btrfs_path *path;
1754                         char *ptr;
1755                         u32 datasize;
1756
1757                         path = btrfs_alloc_path();
1758                         BUG_ON(!path);
1759                         key.objectid = inode->i_ino;
1760                         key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1761                         key.flags = 0;
1762                         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1763                         BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1764                         datasize = offset +
1765                                 btrfs_file_extent_calc_inline_size(write_bytes);
1766                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1767                                                       datasize);
1768                         BUG_ON(ret);
1769                         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1770                                path->slots[0], struct btrfs_file_extent_item);
1771                         btrfs_set_file_extent_generation(ei, trans->transid);
1772                         btrfs_set_file_extent_type(ei,
1773                                                    BTRFS_FILE_EXTENT_INLINE);
1774                         ptr = btrfs_file_extent_inline_start(ei);
1775                         btrfs_memcpy(root, path->nodes[0]->b_data,
1776                                      ptr, bh->b_data, offset + write_bytes);
1777                         mark_buffer_dirty(path->nodes[0]);
1778                         btrfs_free_path(path);
1779                 } else if (buffer_mapped(bh)) {
1780                         btrfs_csum_file_block(trans, root, inode->i_ino,
1781                                       pages[i]->index << PAGE_CACHE_SHIFT,
1782                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1783                         kunmap(pages[i]);
1784                 }
1785                 SetPageChecked(pages[i]);
1786                 // btrfs_update_inode_block_group(trans, inode);
1787                 ret = btrfs_end_transaction(trans, root);
1788                 BUG_ON(ret);
1789                 mutex_unlock(&root->fs_info->fs_mutex);
1790
1791                 ret = btrfs_commit_write(file, pages[i], offset,
1792                                          offset + this_write);
1793                 pos += this_write;
1794                 if (ret) {
1795                         err = ret;
1796                         goto failed;
1797                 }
1798                 WARN_ON(this_write > write_bytes);
1799                 write_bytes -= this_write;
1800         }
1801 failed:
1802         return err;
1803 }
1804
1805 static int drop_extents(struct btrfs_trans_handle *trans,
1806                           struct btrfs_root *root,
1807                           struct inode *inode,
1808                           u64 start, u64 end, u64 *hint_block)
1809 {
1810         int ret;
1811         struct btrfs_key key;
1812         struct btrfs_leaf *leaf;
1813         int slot;
1814         struct btrfs_file_extent_item *extent;
1815         u64 extent_end = 0;
1816         int keep;
1817         struct btrfs_file_extent_item old;
1818         struct btrfs_path *path;
1819         u64 search_start = start;
1820         int bookend;
1821         int found_type;
1822         int found_extent;
1823         int found_inline;
1824
1825         path = btrfs_alloc_path();
1826         if (!path)
1827                 return -ENOMEM;
1828         while(1) {
1829                 btrfs_release_path(root, path);
1830                 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1831                                                search_start, -1);
1832                 if (ret < 0)
1833                         goto out;
1834                 if (ret > 0) {
1835                         if (path->slots[0] == 0) {
1836                                 ret = 0;
1837                                 goto out;
1838                         }
1839                         path->slots[0]--;
1840                 }
1841                 keep = 0;
1842                 bookend = 0;
1843                 found_extent = 0;
1844                 found_inline = 0;
1845                 extent = NULL;
1846                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1847                 slot = path->slots[0];
1848                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1849                 if (key.offset >= end || key.objectid != inode->i_ino) {
1850                         ret = 0;
1851                         goto out;
1852                 }
1853                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1854                         ret = 0;
1855                         goto out;
1856                 }
1857                 extent = btrfs_item_ptr(leaf, slot,
1858                                         struct btrfs_file_extent_item);
1859                 found_type = btrfs_file_extent_type(extent);
1860                 if (found_type == BTRFS_FILE_EXTENT_REG) {
1861                         extent_end = key.offset +
1862                                 (btrfs_file_extent_num_blocks(extent) <<
1863                                  inode->i_blkbits);
1864                         found_extent = 1;
1865                 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1866                         found_inline = 1;
1867                         extent_end = key.offset +
1868                              btrfs_file_extent_inline_len(leaf->items + slot);
1869                 }
1870
1871                 if (!found_extent && !found_inline) {
1872                         ret = 0;
1873                         goto out;
1874                 }
1875
1876                 if (search_start >= extent_end) {
1877                         ret = 0;
1878                         goto out;
1879                 }
1880
1881                 if (found_inline) {
1882                         u64 mask = root->blocksize - 1;
1883                         search_start = (extent_end + mask) & ~mask;
1884                 } else
1885                         search_start = extent_end;
1886
1887                 if (end < extent_end && end >= key.offset) {
1888                         if (found_extent) {
1889                                 u64 disk_blocknr =
1890                                         btrfs_file_extent_disk_blocknr(extent);
1891                                 u64 disk_num_blocks =
1892                                       btrfs_file_extent_disk_num_blocks(extent);
1893                                 memcpy(&old, extent, sizeof(old));
1894                                 if (disk_blocknr != 0) {
1895                                         ret = btrfs_inc_extent_ref(trans, root,
1896                                                  disk_blocknr, disk_num_blocks);
1897                                         BUG_ON(ret);
1898                                 }
1899                         }
1900                         WARN_ON(found_inline);
1901                         bookend = 1;
1902                 }
1903
1904                 if (start > key.offset) {
1905                         u64 new_num;
1906                         u64 old_num;
1907                         /* truncate existing extent */
1908                         keep = 1;
1909                         WARN_ON(start & (root->blocksize - 1));
1910                         if (found_extent) {
1911                                 new_num = (start - key.offset) >>
1912                                         inode->i_blkbits;
1913                                 old_num = btrfs_file_extent_num_blocks(extent);
1914                                 *hint_block =
1915                                         btrfs_file_extent_disk_blocknr(extent);
1916                                 if (btrfs_file_extent_disk_blocknr(extent)) {
1917                                         inode->i_blocks -=
1918                                                 (old_num - new_num) << 3;
1919                                 }
1920                                 btrfs_set_file_extent_num_blocks(extent,
1921                                                                  new_num);
1922                                 mark_buffer_dirty(path->nodes[0]);
1923                         } else {
1924                                 WARN_ON(1);
1925                         }
1926                 }
1927                 if (!keep) {
1928                         u64 disk_blocknr = 0;
1929                         u64 disk_num_blocks = 0;
1930                         u64 extent_num_blocks = 0;
1931                         if (found_extent) {
1932                                 disk_blocknr =
1933                                       btrfs_file_extent_disk_blocknr(extent);
1934                                 disk_num_blocks =
1935                                       btrfs_file_extent_disk_num_blocks(extent);
1936                                 extent_num_blocks =
1937                                       btrfs_file_extent_num_blocks(extent);
1938                                 *hint_block =
1939                                         btrfs_file_extent_disk_blocknr(extent);
1940                         }
1941                         ret = btrfs_del_item(trans, root, path);
1942                         BUG_ON(ret);
1943                         btrfs_release_path(root, path);
1944                         extent = NULL;
1945                         if (found_extent && disk_blocknr != 0) {
1946                                 inode->i_blocks -= extent_num_blocks << 3;
1947                                 ret = btrfs_free_extent(trans, root,
1948                                                         disk_blocknr,
1949                                                         disk_num_blocks, 0);
1950                         }
1951
1952                         BUG_ON(ret);
1953                         if (!bookend && search_start >= end) {
1954                                 ret = 0;
1955                                 goto out;
1956                         }
1957                         if (!bookend)
1958                                 continue;
1959                 }
1960                 if (bookend && found_extent) {
1961                         /* create bookend */
1962                         struct btrfs_key ins;
1963                         ins.objectid = inode->i_ino;
1964                         ins.offset = end;
1965                         ins.flags = 0;
1966                         btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
1967
1968                         btrfs_release_path(root, path);
1969                         ret = btrfs_insert_empty_item(trans, root, path, &ins,
1970                                                       sizeof(*extent));
1971                         BUG_ON(ret);
1972                         extent = btrfs_item_ptr(
1973                                     btrfs_buffer_leaf(path->nodes[0]),
1974                                     path->slots[0],
1975                                     struct btrfs_file_extent_item);
1976                         btrfs_set_file_extent_disk_blocknr(extent,
1977                                     btrfs_file_extent_disk_blocknr(&old));
1978                         btrfs_set_file_extent_disk_num_blocks(extent,
1979                                     btrfs_file_extent_disk_num_blocks(&old));
1980
1981                         btrfs_set_file_extent_offset(extent,
1982                                     btrfs_file_extent_offset(&old) +
1983                                     ((end - key.offset) >> inode->i_blkbits));
1984                         WARN_ON(btrfs_file_extent_num_blocks(&old) <
1985                                 (extent_end - end) >> inode->i_blkbits);
1986                         btrfs_set_file_extent_num_blocks(extent,
1987                                     (extent_end - end) >> inode->i_blkbits);
1988
1989                         btrfs_set_file_extent_type(extent,
1990                                                    BTRFS_FILE_EXTENT_REG);
1991                         btrfs_set_file_extent_generation(extent,
1992                                     btrfs_file_extent_generation(&old));
1993                         btrfs_mark_buffer_dirty(path->nodes[0]);
1994                         if (btrfs_file_extent_disk_blocknr(&old) != 0) {
1995                                 inode->i_blocks +=
1996                                       btrfs_file_extent_num_blocks(extent) << 3;
1997                         }
1998                         ret = 0;
1999                         goto out;
2000                 }
2001         }
2002 out:
2003         btrfs_free_path(path);
2004         return ret;
2005 }
2006
2007 static int prepare_pages(struct btrfs_root *root,
2008                          struct file *file,
2009                          struct page **pages,
2010                          size_t num_pages,
2011                          loff_t pos,
2012                          unsigned long first_index,
2013                          unsigned long last_index,
2014                          size_t write_bytes,
2015                          u64 alloc_extent_start)
2016 {
2017         int i;
2018         unsigned long index = pos >> PAGE_CACHE_SHIFT;
2019         struct inode *inode = file->f_path.dentry->d_inode;
2020         int offset;
2021         int err = 0;
2022         int this_write;
2023         struct buffer_head *bh;
2024         struct buffer_head *head;
2025         loff_t isize = i_size_read(inode);
2026
2027         memset(pages, 0, num_pages * sizeof(struct page *));
2028
2029         for (i = 0; i < num_pages; i++) {
2030                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
2031                 if (!pages[i]) {
2032                         err = -ENOMEM;
2033                         goto failed_release;
2034                 }
2035                 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
2036                 wait_on_page_writeback(pages[i]);
2037                 offset = pos & (PAGE_CACHE_SIZE -1);
2038                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
2039                 if (!page_has_buffers(pages[i])) {
2040                         create_empty_buffers(pages[i],
2041                                              root->fs_info->sb->s_blocksize,
2042                                              (1 << BH_Uptodate));
2043                 }
2044                 head = page_buffers(pages[i]);
2045                 bh = head;
2046                 do {
2047                         err = btrfs_map_bh_to_logical(root, bh,
2048                                                       alloc_extent_start);
2049                         BUG_ON(err);
2050                         if (err)
2051                                 goto failed_truncate;
2052                         bh = bh->b_this_page;
2053                         if (alloc_extent_start)
2054                                 alloc_extent_start++;
2055                 } while (bh != head);
2056                 pos += this_write;
2057                 WARN_ON(this_write > write_bytes);
2058                 write_bytes -= this_write;
2059         }
2060         return 0;
2061
2062 failed_release:
2063         btrfs_drop_pages(pages, num_pages);
2064         return err;
2065
2066 failed_truncate:
2067         btrfs_drop_pages(pages, num_pages);
2068         if (pos > isize)
2069                 vmtruncate(inode, isize);
2070         return err;
2071 }
2072
2073 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
2074                                 size_t count, loff_t *ppos)
2075 {
2076         loff_t pos;
2077         size_t num_written = 0;
2078         int err = 0;
2079         int ret = 0;
2080         struct inode *inode = file->f_path.dentry->d_inode;
2081         struct btrfs_root *root = BTRFS_I(inode)->root;
2082         struct page *pages[8];
2083         struct page *pinned[2];
2084         unsigned long first_index;
2085         unsigned long last_index;
2086         u64 start_pos;
2087         u64 num_blocks;
2088         u64 alloc_extent_start;
2089         u64 hint_block;
2090         struct btrfs_trans_handle *trans;
2091         struct btrfs_key ins;
2092         pinned[0] = NULL;
2093         pinned[1] = NULL;
2094         if (file->f_flags & O_DIRECT)
2095                 return -EINVAL;
2096         pos = *ppos;
2097         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2098         current->backing_dev_info = inode->i_mapping->backing_dev_info;
2099         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
2100         if (err)
2101                 goto out;
2102         if (count == 0)
2103                 goto out;
2104         err = remove_suid(file->f_path.dentry);
2105         if (err)
2106                 goto out;
2107         file_update_time(file);
2108
2109         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
2110         num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
2111                         inode->i_blkbits;
2112
2113         mutex_lock(&inode->i_mutex);
2114         first_index = pos >> PAGE_CACHE_SHIFT;
2115         last_index = (pos + count) >> PAGE_CACHE_SHIFT;
2116
2117         if ((pos & (PAGE_CACHE_SIZE - 1))) {
2118                 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
2119                 if (!PageUptodate(pinned[0])) {
2120                         ret = mpage_readpage(pinned[0], btrfs_get_block);
2121                         BUG_ON(ret);
2122                         wait_on_page_locked(pinned[0]);
2123                 } else {
2124                         unlock_page(pinned[0]);
2125                 }
2126         }
2127         if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
2128                 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
2129                 if (!PageUptodate(pinned[1])) {
2130                         ret = mpage_readpage(pinned[1], btrfs_get_block);
2131                         BUG_ON(ret);
2132                         wait_on_page_locked(pinned[1]);
2133                 } else {
2134                         unlock_page(pinned[1]);
2135                 }
2136         }
2137
2138         mutex_lock(&root->fs_info->fs_mutex);
2139         trans = btrfs_start_transaction(root, 1);
2140         if (!trans) {
2141                 err = -ENOMEM;
2142                 mutex_unlock(&root->fs_info->fs_mutex);
2143                 goto out_unlock;
2144         }
2145         btrfs_set_trans_block_group(trans, inode);
2146         /* FIXME blocksize != 4096 */
2147         inode->i_blocks += num_blocks << 3;
2148         hint_block = 0;
2149         if (start_pos < inode->i_size) {
2150                 /* FIXME blocksize != pagesize */
2151                 ret = drop_extents(trans, root, inode,
2152                                    start_pos,
2153                                    (pos + count + root->blocksize -1) &
2154                                    ~((u64)root->blocksize - 1), &hint_block);
2155                 BUG_ON(ret);
2156         }
2157         if (inode->i_size < start_pos) {
2158                 u64 last_pos_in_file;
2159                 u64 hole_size;
2160                 u64 mask = root->blocksize - 1;
2161                 last_pos_in_file = (inode->i_size + mask) & ~mask;
2162                 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
2163                 hole_size >>= inode->i_blkbits;
2164                 if (last_pos_in_file < start_pos) {
2165                         ret = btrfs_insert_file_extent(trans, root,
2166                                                        inode->i_ino,
2167                                                        last_pos_in_file,
2168                                                        0, 0, hole_size);
2169                 }
2170                 BUG_ON(ret);
2171         }
2172         if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
2173             pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
2174                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
2175                                          num_blocks, hint_block, (u64)-1,
2176                                          &ins, 1);
2177                 BUG_ON(ret);
2178                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
2179                                        start_pos, ins.objectid, ins.offset,
2180                                        ins.offset);
2181                 BUG_ON(ret);
2182         } else {
2183                 ins.offset = 0;
2184                 ins.objectid = 0;
2185         }
2186         BUG_ON(ret);
2187         alloc_extent_start = ins.objectid;
2188         // btrfs_update_inode_block_group(trans, inode);
2189         ret = btrfs_end_transaction(trans, root);
2190         mutex_unlock(&root->fs_info->fs_mutex);
2191
2192         while(count > 0) {
2193                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
2194                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
2195                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
2196                                         PAGE_CACHE_SHIFT;
2197
2198                 memset(pages, 0, sizeof(pages));
2199                 ret = prepare_pages(root, file, pages, num_pages,
2200                                     pos, first_index, last_index,
2201                                     write_bytes, alloc_extent_start);
2202                 BUG_ON(ret);
2203
2204                 /* FIXME blocks != pagesize */
2205                 if (alloc_extent_start)
2206                         alloc_extent_start += num_pages;
2207                 ret = btrfs_copy_from_user(pos, num_pages,
2208                                            write_bytes, pages, buf);
2209                 BUG_ON(ret);
2210
2211                 ret = dirty_and_release_pages(NULL, root, file, pages,
2212                                               num_pages, pos, write_bytes);
2213                 BUG_ON(ret);
2214                 btrfs_drop_pages(pages, num_pages);
2215
2216                 buf += write_bytes;
2217                 count -= write_bytes;
2218                 pos += write_bytes;
2219                 num_written += write_bytes;
2220
2221                 balance_dirty_pages_ratelimited(inode->i_mapping);
2222                 btrfs_btree_balance_dirty(root);
2223                 cond_resched();
2224         }
2225 out_unlock:
2226         mutex_unlock(&inode->i_mutex);
2227 out:
2228         if (pinned[0])
2229                 page_cache_release(pinned[0]);
2230         if (pinned[1])
2231                 page_cache_release(pinned[1]);
2232         *ppos = pos;
2233         current->backing_dev_info = NULL;
2234         mark_inode_dirty(inode);
2235         return num_written ? num_written : err;
2236 }
2237
2238 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
2239                         unsigned long offset, unsigned long size)
2240 {
2241         char *kaddr;
2242         unsigned long left, count = desc->count;
2243         struct inode *inode = page->mapping->host;
2244
2245         if (size > count)
2246                 size = count;
2247
2248         if (!PageChecked(page)) {
2249                 /* FIXME, do it per block */
2250                 struct btrfs_root *root = BTRFS_I(inode)->root;
2251                 int ret;
2252                 struct buffer_head *bh;
2253
2254                 if (page_has_buffers(page)) {
2255                         bh = page_buffers(page);
2256                         if (!buffer_mapped(bh)) {
2257                                 SetPageChecked(page);
2258                                 goto checked;
2259                         }
2260                 }
2261
2262                 ret = btrfs_csum_verify_file_block(root,
2263                                   page->mapping->host->i_ino,
2264                                   page->index << PAGE_CACHE_SHIFT,
2265                                   kmap(page), PAGE_CACHE_SIZE);
2266                 if (ret) {
2267                         if (ret != -ENOENT) {
2268                                 printk("failed to verify ino %lu page %lu ret %d\n",
2269                                        page->mapping->host->i_ino,
2270                                        page->index, ret);
2271                                 memset(page_address(page), 1, PAGE_CACHE_SIZE);
2272                                 flush_dcache_page(page);
2273                         }
2274                 }
2275                 SetPageChecked(page);
2276                 kunmap(page);
2277         }
2278 checked:
2279         /*
2280          * Faults on the destination of a read are common, so do it before
2281          * taking the kmap.
2282          */
2283         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
2284                 kaddr = kmap_atomic(page, KM_USER0);
2285                 left = __copy_to_user_inatomic(desc->arg.buf,
2286                                                 kaddr + offset, size);
2287                 kunmap_atomic(kaddr, KM_USER0);
2288                 if (left == 0)
2289                         goto success;
2290         }
2291
2292         /* Do it the slow way */
2293         kaddr = kmap(page);
2294         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2295         kunmap(page);
2296
2297         if (left) {
2298                 size -= left;
2299                 desc->error = -EFAULT;
2300         }
2301 success:
2302         desc->count = count - size;
2303         desc->written += size;
2304         desc->arg.buf += size;
2305         return size;
2306 }
2307
2308 /**
2309  * btrfs_file_aio_read - filesystem read routine
2310  * @iocb:       kernel I/O control block
2311  * @iov:        io vector request
2312  * @nr_segs:    number of segments in the iovec
2313  * @pos:        current file position
2314  */
2315 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2316                                    unsigned long nr_segs, loff_t pos)
2317 {
2318         struct file *filp = iocb->ki_filp;
2319         ssize_t retval;
2320         unsigned long seg;
2321         size_t count;
2322         loff_t *ppos = &iocb->ki_pos;
2323
2324         count = 0;
2325         for (seg = 0; seg < nr_segs; seg++) {
2326                 const struct iovec *iv = &iov[seg];
2327
2328                 /*
2329                  * If any segment has a negative length, or the cumulative
2330                  * length ever wraps negative then return -EINVAL.
2331                  */
2332                 count += iv->iov_len;
2333                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2334                         return -EINVAL;
2335                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2336                         continue;
2337                 if (seg == 0)
2338                         return -EFAULT;
2339                 nr_segs = seg;
2340                 count -= iv->iov_len;   /* This segment is no good */
2341                 break;
2342         }
2343         retval = 0;
2344         if (count) {
2345                 for (seg = 0; seg < nr_segs; seg++) {
2346                         read_descriptor_t desc;
2347
2348                         desc.written = 0;
2349                         desc.arg.buf = iov[seg].iov_base;
2350                         desc.count = iov[seg].iov_len;
2351                         if (desc.count == 0)
2352                                 continue;
2353                         desc.error = 0;
2354                         do_generic_file_read(filp, ppos, &desc,
2355                                              btrfs_read_actor);
2356                         retval += desc.written;
2357                         if (desc.error) {
2358                                 retval = retval ?: desc.error;
2359                                 break;
2360                         }
2361                 }
2362         }
2363         return retval;
2364 }
2365
2366 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2367 {
2368         struct btrfs_trans_handle *trans;
2369         struct btrfs_key key;
2370         struct btrfs_root_item root_item;
2371         struct btrfs_inode_item *inode_item;
2372         struct buffer_head *subvol;
2373         struct btrfs_leaf *leaf;
2374         struct btrfs_root *new_root;
2375         struct inode *inode;
2376         struct inode *dir;
2377         int ret;
2378         u64 objectid;
2379         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2380
2381         mutex_lock(&root->fs_info->fs_mutex);
2382         trans = btrfs_start_transaction(root, 1);
2383         BUG_ON(!trans);
2384
2385         subvol = btrfs_alloc_free_block(trans, root, 0);
2386         if (subvol == NULL)
2387                 return -ENOSPC;
2388         leaf = btrfs_buffer_leaf(subvol);
2389         btrfs_set_header_nritems(&leaf->header, 0);
2390         btrfs_set_header_level(&leaf->header, 0);
2391         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2392         btrfs_set_header_generation(&leaf->header, trans->transid);
2393         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2394         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2395                sizeof(leaf->header.fsid));
2396         mark_buffer_dirty(subvol);
2397
2398         inode_item = &root_item.inode;
2399         memset(inode_item, 0, sizeof(*inode_item));
2400         btrfs_set_inode_generation(inode_item, 1);
2401         btrfs_set_inode_size(inode_item, 3);
2402         btrfs_set_inode_nlink(inode_item, 1);
2403         btrfs_set_inode_nblocks(inode_item, 1);
2404         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2405
2406         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2407         btrfs_set_root_refs(&root_item, 1);
2408         brelse(subvol);
2409         subvol = NULL;
2410
2411         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2412                                        0, &objectid);
2413         BUG_ON(ret);
2414
2415         btrfs_set_root_dirid(&root_item, new_dirid);
2416
2417         key.objectid = objectid;
2418         key.offset = 1;
2419         key.flags = 0;
2420         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2421         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2422                                 &root_item);
2423         BUG_ON(ret);
2424
2425         /*
2426          * insert the directory item
2427          */
2428         key.offset = (u64)-1;
2429         dir = root->fs_info->sb->s_root->d_inode;
2430         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2431                                     name, namelen, dir->i_ino, &key, 0);
2432         BUG_ON(ret);
2433
2434         ret = btrfs_commit_transaction(trans, root);
2435         BUG_ON(ret);
2436
2437         new_root = btrfs_read_fs_root(root->fs_info, &key);
2438         BUG_ON(!new_root);
2439
2440         trans = btrfs_start_transaction(new_root, 1);
2441         BUG_ON(!trans);
2442
2443         inode = btrfs_new_inode(trans, new_root, new_dirid,
2444                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2445         inode->i_op = &btrfs_dir_inode_operations;
2446         inode->i_fop = &btrfs_dir_file_operations;
2447
2448         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2449         BUG_ON(ret);
2450
2451         inode->i_nlink = 1;
2452         inode->i_size = 6;
2453         ret = btrfs_update_inode(trans, new_root, inode);
2454         BUG_ON(ret);
2455
2456         ret = btrfs_commit_transaction(trans, new_root);
2457         BUG_ON(ret);
2458
2459         iput(inode);
2460
2461         mutex_unlock(&root->fs_info->fs_mutex);
2462         btrfs_btree_balance_dirty(root);
2463         return 0;
2464 }
2465
2466 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2467 {
2468         struct btrfs_trans_handle *trans;
2469         struct btrfs_key key;
2470         struct btrfs_root_item new_root_item;
2471         int ret;
2472         u64 objectid;
2473
2474         if (!root->ref_cows)
2475                 return -EINVAL;
2476
2477         mutex_lock(&root->fs_info->fs_mutex);
2478         trans = btrfs_start_transaction(root, 1);
2479         BUG_ON(!trans);
2480
2481         ret = btrfs_update_inode(trans, root, root->inode);
2482         BUG_ON(ret);
2483
2484         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2485                                        0, &objectid);
2486         BUG_ON(ret);
2487
2488         memcpy(&new_root_item, &root->root_item,
2489                sizeof(new_root_item));
2490
2491         key.objectid = objectid;
2492         key.offset = 1;
2493         key.flags = 0;
2494         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2495         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2496
2497         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2498                                 &new_root_item);
2499         BUG_ON(ret);
2500
2501         /*
2502          * insert the directory item
2503          */
2504         key.offset = (u64)-1;
2505         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2506                                     name, namelen,
2507                                     root->fs_info->sb->s_root->d_inode->i_ino,
2508                                     &key, 0);
2509
2510         BUG_ON(ret);
2511
2512         ret = btrfs_inc_root_ref(trans, root);
2513         BUG_ON(ret);
2514
2515         ret = btrfs_commit_transaction(trans, root);
2516         BUG_ON(ret);
2517         mutex_unlock(&root->fs_info->fs_mutex);
2518         btrfs_btree_balance_dirty(root);
2519         return 0;
2520 }
2521
2522 static int add_disk(struct btrfs_root *root, char *name, int namelen)
2523 {
2524         struct block_device *bdev;
2525         struct btrfs_path *path;
2526         struct super_block *sb = root->fs_info->sb;
2527         struct btrfs_root *dev_root = root->fs_info->dev_root;
2528         struct btrfs_trans_handle *trans;
2529         struct btrfs_device_item *dev_item;
2530         struct btrfs_key key;
2531         u16 item_size;
2532         u64 num_blocks;
2533         u64 new_blocks;
2534         u64 device_id;
2535         int ret;
2536
2537 printk("adding disk %s\n", name);
2538         path = btrfs_alloc_path();
2539         if (!path)
2540                 return -ENOMEM;
2541         num_blocks = btrfs_super_total_blocks(root->fs_info->disk_super);
2542         bdev = open_bdev_excl(name, O_RDWR, sb);
2543         if (IS_ERR(bdev)) {
2544                 ret = PTR_ERR(bdev);
2545 printk("open bdev excl failed ret %d\n", ret);
2546                 goto out_nolock;
2547         }
2548         set_blocksize(bdev, sb->s_blocksize);
2549         new_blocks = bdev->bd_inode->i_size >> sb->s_blocksize_bits;
2550         key.objectid = num_blocks;
2551         key.offset = new_blocks;
2552         key.flags = 0;
2553         btrfs_set_key_type(&key, BTRFS_DEV_ITEM_KEY);
2554
2555         mutex_lock(&dev_root->fs_info->fs_mutex);
2556         trans = btrfs_start_transaction(dev_root, 1);
2557         item_size = sizeof(*dev_item) + namelen;
2558 printk("insert empty on %Lu %Lu %u size %d\n", num_blocks, new_blocks, key.flags, item_size);
2559         ret = btrfs_insert_empty_item(trans, dev_root, path, &key, item_size);
2560         if (ret) {
2561 printk("insert failed %d\n", ret);
2562                 close_bdev_excl(bdev);
2563                 if (ret > 0)
2564                         ret = -EEXIST;
2565                 goto out;
2566         }
2567         dev_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2568                                   path->slots[0], struct btrfs_device_item);
2569         btrfs_set_device_pathlen(dev_item, namelen);
2570         memcpy(dev_item + 1, name, namelen);
2571
2572         device_id = btrfs_super_last_device_id(root->fs_info->disk_super) + 1;
2573         btrfs_set_super_last_device_id(root->fs_info->disk_super, device_id);
2574         btrfs_set_device_id(dev_item, device_id);
2575         mark_buffer_dirty(path->nodes[0]);
2576
2577         ret = btrfs_insert_dev_radix(root, bdev, device_id, num_blocks,
2578                                      new_blocks);
2579
2580         if (!ret) {
2581                 btrfs_set_super_total_blocks(root->fs_info->disk_super,
2582                                              num_blocks + new_blocks);
2583                 i_size_write(root->fs_info->btree_inode,
2584                              (num_blocks + new_blocks) <<
2585                              root->fs_info->btree_inode->i_blkbits);
2586         }
2587
2588 out:
2589         ret = btrfs_commit_transaction(trans, dev_root);
2590         BUG_ON(ret);
2591         mutex_unlock(&root->fs_info->fs_mutex);
2592 out_nolock:
2593         btrfs_free_path(path);
2594         btrfs_btree_balance_dirty(root);
2595
2596         return ret;
2597 }
2598
2599 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2600                        cmd, unsigned long arg)
2601 {
2602         struct btrfs_root *root = BTRFS_I(inode)->root;
2603         struct btrfs_ioctl_vol_args vol_args;
2604         int ret = 0;
2605         struct btrfs_dir_item *di;
2606         int namelen;
2607         struct btrfs_path *path;
2608         u64 root_dirid;
2609
2610         switch (cmd) {
2611         case BTRFS_IOC_SNAP_CREATE:
2612                 if (copy_from_user(&vol_args,
2613                                    (struct btrfs_ioctl_vol_args __user *)arg,
2614                                    sizeof(vol_args)))
2615                         return -EFAULT;
2616                 namelen = strlen(vol_args.name);
2617                 if (namelen > BTRFS_VOL_NAME_MAX)
2618                         return -EINVAL;
2619                 path = btrfs_alloc_path();
2620                 if (!path)
2621                         return -ENOMEM;
2622                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2623                 mutex_lock(&root->fs_info->fs_mutex);
2624                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2625                                     path, root_dirid,
2626                                     vol_args.name, namelen, 0);
2627                 mutex_unlock(&root->fs_info->fs_mutex);
2628                 btrfs_free_path(path);
2629                 if (di && !IS_ERR(di))
2630                         return -EEXIST;
2631
2632                 if (root == root->fs_info->tree_root)
2633                         ret = create_subvol(root, vol_args.name, namelen);
2634                 else
2635                         ret = create_snapshot(root, vol_args.name, namelen);
2636                 WARN_ON(ret);
2637                 break;
2638         case BTRFS_IOC_ADD_DISK:
2639                 if (copy_from_user(&vol_args,
2640                                    (struct btrfs_ioctl_vol_args __user *)arg,
2641                                    sizeof(vol_args)))
2642                         return -EFAULT;
2643                 namelen = strlen(vol_args.name);
2644                 if (namelen > BTRFS_VOL_NAME_MAX)
2645                         return -EINVAL;
2646                 vol_args.name[namelen] = '\0';
2647                 ret = add_disk(root, vol_args.name, namelen);
2648                 break;
2649         default:
2650                 return -ENOTTY;
2651         }
2652         return ret;
2653 }
2654
2655 static struct kmem_cache *btrfs_inode_cachep;
2656 struct kmem_cache *btrfs_trans_handle_cachep;
2657 struct kmem_cache *btrfs_transaction_cachep;
2658 struct kmem_cache *btrfs_bit_radix_cachep;
2659 struct kmem_cache *btrfs_path_cachep;
2660
2661 /*
2662  * Called inside transaction, so use GFP_NOFS
2663  */
2664 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2665 {
2666         struct btrfs_inode *ei;
2667
2668         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2669         if (!ei)
2670                 return NULL;
2671         return &ei->vfs_inode;
2672 }
2673
2674 static void btrfs_destroy_inode(struct inode *inode)
2675 {
2676         WARN_ON(!list_empty(&inode->i_dentry));
2677         WARN_ON(inode->i_data.nrpages);
2678
2679         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2680 }
2681
2682 static void init_once(void * foo, struct kmem_cache * cachep,
2683                       unsigned long flags)
2684 {
2685         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2686
2687         if ((flags & (SLAB_CTOR_CONSTRUCTOR)) ==
2688             SLAB_CTOR_CONSTRUCTOR) {
2689                 inode_init_once(&ei->vfs_inode);
2690         }
2691 }
2692
2693 static int init_inodecache(void)
2694 {
2695         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2696                                              sizeof(struct btrfs_inode),
2697                                              0, (SLAB_RECLAIM_ACCOUNT|
2698                                                 SLAB_MEM_SPREAD),
2699                                              init_once, NULL);
2700         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2701                                              sizeof(struct btrfs_trans_handle),
2702                                              0, (SLAB_RECLAIM_ACCOUNT|
2703                                                 SLAB_MEM_SPREAD),
2704                                              NULL, NULL);
2705         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2706                                              sizeof(struct btrfs_transaction),
2707                                              0, (SLAB_RECLAIM_ACCOUNT|
2708                                                 SLAB_MEM_SPREAD),
2709                                              NULL, NULL);
2710         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2711                                              sizeof(struct btrfs_transaction),
2712                                              0, (SLAB_RECLAIM_ACCOUNT|
2713                                                 SLAB_MEM_SPREAD),
2714                                              NULL, NULL);
2715         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2716                                              256,
2717                                              0, (SLAB_RECLAIM_ACCOUNT|
2718                                                 SLAB_MEM_SPREAD |
2719                                                 SLAB_DESTROY_BY_RCU),
2720                                              NULL, NULL);
2721         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2722             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2723                 return -ENOMEM;
2724         return 0;
2725 }
2726
2727 static void destroy_inodecache(void)
2728 {
2729         kmem_cache_destroy(btrfs_inode_cachep);
2730         kmem_cache_destroy(btrfs_trans_handle_cachep);
2731         kmem_cache_destroy(btrfs_transaction_cachep);
2732         kmem_cache_destroy(btrfs_bit_radix_cachep);
2733         kmem_cache_destroy(btrfs_path_cachep);
2734 }
2735
2736 static int btrfs_get_sb(struct file_system_type *fs_type,
2737         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2738 {
2739         return get_sb_bdev(fs_type, flags, dev_name, data,
2740                            btrfs_fill_super, mnt);
2741 }
2742
2743 static int btrfs_getattr(struct vfsmount *mnt,
2744                          struct dentry *dentry, struct kstat *stat)
2745 {
2746         struct inode *inode = dentry->d_inode;
2747         generic_fillattr(inode, stat);
2748         stat->blksize = 256 * 1024;
2749         return 0;
2750 }
2751
2752 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2753 {
2754         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2755         struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2756
2757         buf->f_namelen = BTRFS_NAME_LEN;
2758         buf->f_blocks = btrfs_super_total_blocks(disk_super);
2759         buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2760         buf->f_bavail = buf->f_bfree;
2761         buf->f_bsize = dentry->d_sb->s_blocksize;
2762         buf->f_type = BTRFS_SUPER_MAGIC;
2763         return 0;
2764 }
2765
2766 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2767                            struct inode * new_dir,struct dentry *new_dentry)
2768 {
2769         struct btrfs_trans_handle *trans;
2770         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2771         struct inode *new_inode = new_dentry->d_inode;
2772         struct inode *old_inode = old_dentry->d_inode;
2773         struct timespec ctime = CURRENT_TIME;
2774         struct btrfs_path *path;
2775         struct btrfs_dir_item *di;
2776         int ret;
2777
2778         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2779             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2780                 return -ENOTEMPTY;
2781         }
2782         mutex_lock(&root->fs_info->fs_mutex);
2783         trans = btrfs_start_transaction(root, 1);
2784         btrfs_set_trans_block_group(trans, new_dir);
2785         path = btrfs_alloc_path();
2786         if (!path) {
2787                 ret = -ENOMEM;
2788                 goto out_fail;
2789         }
2790
2791         old_dentry->d_inode->i_nlink++;
2792         old_dir->i_ctime = old_dir->i_mtime = ctime;
2793         new_dir->i_ctime = new_dir->i_mtime = ctime;
2794         old_inode->i_ctime = ctime;
2795         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2796                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2797                 u64 old_parent_oid;
2798                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2799                                            "..", 2, -1);
2800                 if (IS_ERR(di)) {
2801                         ret = PTR_ERR(di);
2802                         goto out_fail;
2803                 }
2804                 if (!di) {
2805                         ret = -ENOENT;
2806                         goto out_fail;
2807                 }
2808                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2809                 ret = btrfs_del_item(trans, root, path);
2810                 if (ret) {
2811                         ret = -EIO;
2812                         goto out_fail;
2813                 }
2814                 btrfs_release_path(root, path);
2815
2816                 di = btrfs_lookup_dir_index_item(trans, root, path,
2817                                                  old_inode->i_ino,
2818                                                  old_parent_oid,
2819                                                  "..", 2, -1);
2820                 if (IS_ERR(di)) {
2821                         ret = PTR_ERR(di);
2822                         goto out_fail;
2823                 }
2824                 if (!di) {
2825                         ret = -ENOENT;
2826                         goto out_fail;
2827                 }
2828                 ret = btrfs_del_item(trans, root, path);
2829                 if (ret) {
2830                         ret = -EIO;
2831                         goto out_fail;
2832                 }
2833                 btrfs_release_path(root, path);
2834
2835                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2836                                             old_inode->i_ino, location, 0);
2837                 if (ret)
2838                         goto out_fail;
2839         }
2840
2841
2842         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2843         if (ret)
2844                 goto out_fail;
2845
2846         if (new_inode) {
2847                 new_inode->i_ctime = CURRENT_TIME;
2848                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2849                 if (ret)
2850                         goto out_fail;
2851                 if (S_ISDIR(new_inode->i_mode))
2852                         clear_nlink(new_inode);
2853                 else
2854                         drop_nlink(new_inode);
2855                 btrfs_update_inode(trans, root, new_inode);
2856         }
2857         ret = btrfs_add_link(trans, new_dentry, old_inode);
2858         if (ret)
2859                 goto out_fail;
2860
2861 out_fail:
2862         btrfs_free_path(path);
2863         btrfs_end_transaction(trans, root);
2864         mutex_unlock(&root->fs_info->fs_mutex);
2865         return ret;
2866 }
2867
2868 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2869                          const char *symname)
2870 {
2871         struct btrfs_trans_handle *trans;
2872         struct btrfs_root *root = BTRFS_I(dir)->root;
2873         struct btrfs_path *path;
2874         struct btrfs_key key;
2875         struct inode *inode;
2876         int err;
2877         int drop_inode = 0;
2878         u64 objectid;
2879         int name_len;
2880         int datasize;
2881         char *ptr;
2882         struct btrfs_file_extent_item *ei;
2883
2884         name_len = strlen(symname) + 1;
2885         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2886                 return -ENAMETOOLONG;
2887         mutex_lock(&root->fs_info->fs_mutex);
2888         trans = btrfs_start_transaction(root, 1);
2889         btrfs_set_trans_block_group(trans, dir);
2890
2891         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2892         if (err) {
2893                 err = -ENOSPC;
2894                 goto out_unlock;
2895         }
2896
2897         inode = btrfs_new_inode(trans, root, objectid,
2898                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2899         err = PTR_ERR(inode);
2900         if (IS_ERR(inode))
2901                 goto out_unlock;
2902
2903         btrfs_set_trans_block_group(trans, inode);
2904         err = btrfs_add_nondir(trans, dentry, inode);
2905         if (err)
2906                 drop_inode = 1;
2907         else {
2908                 inode->i_mapping->a_ops = &btrfs_aops;
2909                 inode->i_fop = &btrfs_file_operations;
2910                 inode->i_op = &btrfs_file_inode_operations;
2911         }
2912         dir->i_sb->s_dirt = 1;
2913         btrfs_update_inode_block_group(trans, inode);
2914         btrfs_update_inode_block_group(trans, dir);
2915         if (drop_inode)
2916                 goto out_unlock;
2917
2918         path = btrfs_alloc_path();
2919         BUG_ON(!path);
2920         key.objectid = inode->i_ino;
2921         key.offset = 0;
2922         key.flags = 0;
2923         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2924         datasize = btrfs_file_extent_calc_inline_size(name_len);
2925         err = btrfs_insert_empty_item(trans, root, path, &key,
2926                                       datasize);
2927         BUG_ON(err);
2928         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2929                path->slots[0], struct btrfs_file_extent_item);
2930         btrfs_set_file_extent_generation(ei, trans->transid);
2931         btrfs_set_file_extent_type(ei,
2932                                    BTRFS_FILE_EXTENT_INLINE);
2933         ptr = btrfs_file_extent_inline_start(ei);
2934         btrfs_memcpy(root, path->nodes[0]->b_data,
2935                      ptr, symname, name_len);
2936         mark_buffer_dirty(path->nodes[0]);
2937         btrfs_free_path(path);
2938         inode->i_op = &btrfs_symlink_inode_operations;
2939         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2940         inode->i_size = name_len - 1;
2941         btrfs_update_inode(trans, root, inode);
2942         err = 0;
2943
2944 out_unlock:
2945         btrfs_end_transaction(trans, root);
2946         mutex_unlock(&root->fs_info->fs_mutex);
2947
2948         if (drop_inode) {
2949                 inode_dec_link_count(inode);
2950                 iput(inode);
2951         }
2952         btrfs_btree_balance_dirty(root);
2953         return err;
2954 }
2955
2956 static struct file_system_type btrfs_fs_type = {
2957         .owner          = THIS_MODULE,
2958         .name           = "btrfs",
2959         .get_sb         = btrfs_get_sb,
2960         .kill_sb        = kill_block_super,
2961         .fs_flags       = FS_REQUIRES_DEV,
2962 };
2963
2964 static struct super_operations btrfs_super_ops = {
2965         .delete_inode   = btrfs_delete_inode,
2966         .put_super      = btrfs_put_super,
2967         .read_inode     = btrfs_read_locked_inode,
2968         .write_super    = btrfs_write_super,
2969         .sync_fs        = btrfs_sync_fs,
2970         .write_inode    = btrfs_write_inode,
2971         .dirty_inode    = btrfs_dirty_inode,
2972         .alloc_inode    = btrfs_alloc_inode,
2973         .destroy_inode  = btrfs_destroy_inode,
2974         .statfs         = btrfs_statfs,
2975 };
2976
2977 static struct inode_operations btrfs_dir_inode_operations = {
2978         .lookup         = btrfs_lookup,
2979         .create         = btrfs_create,
2980         .unlink         = btrfs_unlink,
2981         .link           = btrfs_link,
2982         .mkdir          = btrfs_mkdir,
2983         .rmdir          = btrfs_rmdir,
2984         .rename         = btrfs_rename,
2985         .symlink        = btrfs_symlink,
2986         .setattr        = btrfs_setattr,
2987 };
2988
2989 static struct inode_operations btrfs_dir_ro_inode_operations = {
2990         .lookup         = btrfs_lookup,
2991 };
2992
2993 static struct file_operations btrfs_dir_file_operations = {
2994         .llseek         = generic_file_llseek,
2995         .read           = generic_read_dir,
2996         .readdir        = btrfs_readdir,
2997         .ioctl          = btrfs_ioctl,
2998 };
2999
3000 static struct address_space_operations btrfs_aops = {
3001         .readpage       = btrfs_readpage,
3002         .writepage      = btrfs_writepage,
3003         .sync_page      = block_sync_page,
3004         .prepare_write  = btrfs_prepare_write,
3005         .commit_write   = btrfs_commit_write,
3006 };
3007
3008 static struct address_space_operations btrfs_symlink_aops = {
3009         .readpage       = btrfs_readpage,
3010         .writepage      = btrfs_writepage,
3011 };
3012
3013 static struct inode_operations btrfs_file_inode_operations = {
3014         .truncate       = btrfs_truncate,
3015         .getattr        = btrfs_getattr,
3016         .setattr        = btrfs_setattr,
3017 };
3018
3019 static struct file_operations btrfs_file_operations = {
3020         .llseek         = generic_file_llseek,
3021         .read           = do_sync_read,
3022         .aio_read       = btrfs_file_aio_read,
3023         .write          = btrfs_file_write,
3024         .mmap           = generic_file_mmap,
3025         .open           = generic_file_open,
3026         .ioctl          = btrfs_ioctl,
3027         .fsync          = btrfs_sync_file,
3028 };
3029
3030 static struct inode_operations btrfs_symlink_inode_operations = {
3031         .readlink       = generic_readlink,
3032         .follow_link    = page_follow_link_light,
3033         .put_link       = page_put_link,
3034 };
3035
3036 static int __init init_btrfs_fs(void)
3037 {
3038         int err;
3039         printk("btrfs loaded!\n");
3040         err = init_inodecache();
3041         if (err)
3042                 return err;
3043         return register_filesystem(&btrfs_fs_type);
3044         destroy_inodecache();
3045         return err;
3046 }
3047
3048 static void __exit exit_btrfs_fs(void)
3049 {
3050         destroy_inodecache();
3051         unregister_filesystem(&btrfs_fs_type);
3052         printk("btrfs unloaded\n");
3053 }
3054
3055 module_init(init_btrfs_fs)
3056 module_exit(exit_btrfs_fs)
3057
3058 MODULE_LICENSE("GPL");