Btrfs: fix oops after block group lookup
[linux-2.6] / fs / btrfs / super.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
15 #include <linux/compat.h>
16 #include "ctree.h"
17 #include "disk-io.h"
18 #include "transaction.h"
19 #include "btrfs_inode.h"
20 #include "ioctl.h"
21 #include "print-tree.h"
22
23 struct btrfs_iget_args {
24         u64 ino;
25         struct btrfs_root *root;
26 };
27
28 #define BTRFS_SUPER_MAGIC 0x9123682E
29
30 static struct inode_operations btrfs_dir_inode_operations;
31 static struct inode_operations btrfs_symlink_inode_operations;
32 static struct inode_operations btrfs_dir_ro_inode_operations;
33 static struct super_operations btrfs_super_ops;
34 static struct file_operations btrfs_dir_file_operations;
35 static struct inode_operations btrfs_file_inode_operations;
36 static struct address_space_operations btrfs_aops;
37 static struct address_space_operations btrfs_symlink_aops;
38 static struct file_operations btrfs_file_operations;
39
40 static int drop_extents(struct btrfs_trans_handle *trans,
41                           struct btrfs_root *root,
42                           struct inode *inode,
43                           u64 start, u64 end, u64 *hint_block);
44 static int btrfs_get_block(struct inode *inode, sector_t iblock,
45                            struct buffer_head *result, int create);
46
47
48 #define S_SHIFT 12
49 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
50         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
51         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
52         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
53         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
54         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
55         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
56         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
57 };
58
59 static void btrfs_read_locked_inode(struct inode *inode)
60 {
61         struct btrfs_path *path;
62         struct btrfs_inode_item *inode_item;
63         struct btrfs_root *root = BTRFS_I(inode)->root;
64         struct btrfs_key location;
65         u64 alloc_group_block;
66         int ret;
67
68         path = btrfs_alloc_path();
69         BUG_ON(!path);
70         btrfs_init_path(path);
71         mutex_lock(&root->fs_info->fs_mutex);
72
73         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
74         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
75         if (ret) {
76                 btrfs_free_path(path);
77                 goto make_bad;
78         }
79         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
80                                   path->slots[0],
81                                   struct btrfs_inode_item);
82
83         inode->i_mode = btrfs_inode_mode(inode_item);
84         inode->i_nlink = btrfs_inode_nlink(inode_item);
85         inode->i_uid = btrfs_inode_uid(inode_item);
86         inode->i_gid = btrfs_inode_gid(inode_item);
87         inode->i_size = btrfs_inode_size(inode_item);
88         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
89         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
90         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
91         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
92         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
93         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
94         inode->i_blocks = btrfs_inode_nblocks(inode_item);
95         inode->i_generation = btrfs_inode_generation(inode_item);
96         alloc_group_block = btrfs_inode_block_group(inode_item);
97         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
98                                                        alloc_group_block);
99
100         btrfs_free_path(path);
101         inode_item = NULL;
102
103         mutex_unlock(&root->fs_info->fs_mutex);
104
105         switch (inode->i_mode & S_IFMT) {
106 #if 0
107         default:
108                 init_special_inode(inode, inode->i_mode,
109                                    btrfs_inode_rdev(inode_item));
110                 break;
111 #endif
112         case S_IFREG:
113                 inode->i_mapping->a_ops = &btrfs_aops;
114                 inode->i_fop = &btrfs_file_operations;
115                 inode->i_op = &btrfs_file_inode_operations;
116                 break;
117         case S_IFDIR:
118                 inode->i_fop = &btrfs_dir_file_operations;
119                 if (root == root->fs_info->tree_root)
120                         inode->i_op = &btrfs_dir_ro_inode_operations;
121                 else
122                         inode->i_op = &btrfs_dir_inode_operations;
123                 break;
124         case S_IFLNK:
125                 inode->i_op = &btrfs_symlink_inode_operations;
126                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
127                 break;
128         }
129         return;
130
131 make_bad:
132         btrfs_release_path(root, path);
133         btrfs_free_path(path);
134         mutex_unlock(&root->fs_info->fs_mutex);
135         make_bad_inode(inode);
136 }
137
138 static void fill_inode_item(struct btrfs_inode_item *item,
139                             struct inode *inode)
140 {
141         btrfs_set_inode_uid(item, inode->i_uid);
142         btrfs_set_inode_gid(item, inode->i_gid);
143         btrfs_set_inode_size(item, inode->i_size);
144         btrfs_set_inode_mode(item, inode->i_mode);
145         btrfs_set_inode_nlink(item, inode->i_nlink);
146         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
147         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
148         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
149         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
150         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
151         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
152         btrfs_set_inode_nblocks(item, inode->i_blocks);
153         btrfs_set_inode_generation(item, inode->i_generation);
154         btrfs_set_inode_block_group(item,
155                                     BTRFS_I(inode)->block_group->key.objectid);
156 }
157
158 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
159                               struct btrfs_root *root,
160                               struct inode *inode)
161 {
162         struct btrfs_inode_item *inode_item;
163         struct btrfs_path *path;
164         int ret;
165
166         path = btrfs_alloc_path();
167         BUG_ON(!path);
168         btrfs_init_path(path);
169         ret = btrfs_lookup_inode(trans, root, path,
170                                  &BTRFS_I(inode)->location, 1);
171         if (ret) {
172                 if (ret > 0)
173                         ret = -ENOENT;
174                 goto failed;
175         }
176
177         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
178                                   path->slots[0],
179                                   struct btrfs_inode_item);
180
181         fill_inode_item(inode_item, inode);
182         btrfs_mark_buffer_dirty(path->nodes[0]);
183         ret = 0;
184 failed:
185         btrfs_release_path(root, path);
186         btrfs_free_path(path);
187         return ret;
188 }
189
190
191 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
192                               struct btrfs_root *root,
193                               struct inode *dir,
194                               struct dentry *dentry)
195 {
196         struct btrfs_path *path;
197         const char *name = dentry->d_name.name;
198         int name_len = dentry->d_name.len;
199         int ret = 0;
200         u64 objectid;
201         struct btrfs_dir_item *di;
202
203         path = btrfs_alloc_path();
204         BUG_ON(!path);
205         btrfs_init_path(path);
206         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
207                                     name, name_len, -1);
208         if (IS_ERR(di)) {
209                 ret = PTR_ERR(di);
210                 goto err;
211         }
212         if (!di) {
213                 ret = -ENOENT;
214                 goto err;
215         }
216         objectid = btrfs_disk_key_objectid(&di->location);
217         ret = btrfs_delete_one_dir_name(trans, root, path, di);
218         BUG_ON(ret);
219         btrfs_release_path(root, path);
220
221         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
222                                          objectid, name, name_len, -1);
223         if (IS_ERR(di)) {
224                 ret = PTR_ERR(di);
225                 goto err;
226         }
227         if (!di) {
228                 ret = -ENOENT;
229                 goto err;
230         }
231         ret = btrfs_delete_one_dir_name(trans, root, path, di);
232         BUG_ON(ret);
233
234         dentry->d_inode->i_ctime = dir->i_ctime;
235 err:
236         btrfs_free_path(path);
237         if (!ret) {
238                 dir->i_size -= name_len * 2;
239                 btrfs_update_inode(trans, root, dir);
240                 drop_nlink(dentry->d_inode);
241                 btrfs_update_inode(trans, root, dentry->d_inode);
242                 dir->i_sb->s_dirt = 1;
243         }
244         return ret;
245 }
246
247 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
248 {
249         struct btrfs_root *root;
250         struct btrfs_trans_handle *trans;
251         int ret;
252
253         root = BTRFS_I(dir)->root;
254         mutex_lock(&root->fs_info->fs_mutex);
255         trans = btrfs_start_transaction(root, 1);
256         btrfs_set_trans_block_group(trans, dir);
257         ret = btrfs_unlink_trans(trans, root, dir, dentry);
258         btrfs_end_transaction(trans, root);
259         mutex_unlock(&root->fs_info->fs_mutex);
260         btrfs_btree_balance_dirty(root);
261         return ret;
262 }
263
264 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
265 {
266         struct inode *inode = dentry->d_inode;
267         int err;
268         int ret;
269         struct btrfs_root *root = BTRFS_I(dir)->root;
270         struct btrfs_path *path;
271         struct btrfs_key key;
272         struct btrfs_trans_handle *trans;
273         struct btrfs_key found_key;
274         int found_type;
275         struct btrfs_leaf *leaf;
276         char *goodnames = "..";
277
278         path = btrfs_alloc_path();
279         BUG_ON(!path);
280         btrfs_init_path(path);
281         mutex_lock(&root->fs_info->fs_mutex);
282         trans = btrfs_start_transaction(root, 1);
283         btrfs_set_trans_block_group(trans, dir);
284         key.objectid = inode->i_ino;
285         key.offset = (u64)-1;
286         key.flags = (u32)-1;
287         while(1) {
288                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
289                 if (ret < 0) {
290                         err = ret;
291                         goto out;
292                 }
293                 BUG_ON(ret == 0);
294                 if (path->slots[0] == 0) {
295                         err = -ENOENT;
296                         goto out;
297                 }
298                 path->slots[0]--;
299                 leaf = btrfs_buffer_leaf(path->nodes[0]);
300                 btrfs_disk_key_to_cpu(&found_key,
301                                       &leaf->items[path->slots[0]].key);
302                 found_type = btrfs_key_type(&found_key);
303                 if (found_key.objectid != inode->i_ino) {
304                         err = -ENOENT;
305                         goto out;
306                 }
307                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
308                      found_type != BTRFS_DIR_INDEX_KEY) ||
309                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
310                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
311                         err = -ENOTEMPTY;
312                         goto out;
313                 }
314                 ret = btrfs_del_item(trans, root, path);
315                 BUG_ON(ret);
316
317                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
318                         break;
319                 btrfs_release_path(root, path);
320         }
321         ret = 0;
322         btrfs_release_path(root, path);
323
324         /* now the directory is empty */
325         err = btrfs_unlink_trans(trans, root, dir, dentry);
326         if (!err) {
327                 inode->i_size = 0;
328         }
329 out:
330         btrfs_release_path(root, path);
331         btrfs_free_path(path);
332         mutex_unlock(&root->fs_info->fs_mutex);
333         ret = btrfs_end_transaction(trans, root);
334         btrfs_btree_balance_dirty(root);
335         if (ret && !err)
336                 err = ret;
337         return err;
338 }
339
340 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
341                             struct btrfs_root *root,
342                             struct inode *inode)
343 {
344         struct btrfs_path *path;
345         int ret;
346
347         clear_inode(inode);
348
349         path = btrfs_alloc_path();
350         BUG_ON(!path);
351         btrfs_init_path(path);
352         ret = btrfs_lookup_inode(trans, root, path,
353                                  &BTRFS_I(inode)->location, -1);
354         BUG_ON(ret);
355         ret = btrfs_del_item(trans, root, path);
356         BUG_ON(ret);
357         btrfs_free_path(path);
358         return ret;
359 }
360
361 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
362                            u64 objectid)
363 {
364         struct btrfs_node *node;
365         int i;
366         int nritems;
367         u64 item_objectid;
368         u64 blocknr;
369         int slot;
370         int ret;
371
372         if (!path->nodes[1])
373                 return;
374         node = btrfs_buffer_node(path->nodes[1]);
375         slot = path->slots[1];
376         if (slot == 0)
377                 return;
378         nritems = btrfs_header_nritems(&node->header);
379         for (i = slot - 1; i >= 0; i--) {
380                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
381                 if (item_objectid != objectid)
382                         break;
383                 blocknr = btrfs_node_blockptr(node, i);
384                 ret = readahead_tree_block(root, blocknr);
385                 if (ret)
386                         break;
387         }
388 }
389
390 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
391                                    struct btrfs_root *root,
392                                    struct inode *inode)
393 {
394         int ret;
395         struct btrfs_path *path;
396         struct btrfs_key key;
397         struct btrfs_disk_key *found_key;
398         u32 found_type;
399         struct btrfs_leaf *leaf;
400         struct btrfs_file_extent_item *fi;
401         u64 extent_start = 0;
402         u64 extent_num_blocks = 0;
403         u64 item_end = 0;
404         int found_extent;
405         int del_item;
406
407         path = btrfs_alloc_path();
408         BUG_ON(!path);
409         /* FIXME, add redo link to tree so we don't leak on crash */
410         key.objectid = inode->i_ino;
411         key.offset = (u64)-1;
412         key.flags = (u32)-1;
413         while(1) {
414                 btrfs_init_path(path);
415                 fi = NULL;
416                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
417                 if (ret < 0) {
418                         goto error;
419                 }
420                 if (ret > 0) {
421                         BUG_ON(path->slots[0] == 0);
422                         path->slots[0]--;
423                 }
424                 reada_truncate(root, path, inode->i_ino);
425                 leaf = btrfs_buffer_leaf(path->nodes[0]);
426                 found_key = &leaf->items[path->slots[0]].key;
427                 found_type = btrfs_disk_key_type(found_key);
428                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
429                         break;
430                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
431                     found_type != BTRFS_DIR_ITEM_KEY &&
432                     found_type != BTRFS_DIR_INDEX_KEY &&
433                     found_type != BTRFS_EXTENT_DATA_KEY)
434                         break;
435                 item_end = btrfs_disk_key_offset(found_key);
436                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
437                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
438                                             path->slots[0],
439                                             struct btrfs_file_extent_item);
440                         if (btrfs_file_extent_type(fi) !=
441                             BTRFS_FILE_EXTENT_INLINE) {
442                                 item_end += btrfs_file_extent_num_blocks(fi) <<
443                                                 inode->i_blkbits;
444                         }
445                 }
446                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
447                         ret = btrfs_csum_truncate(trans, root, path,
448                                                   inode->i_size);
449                         BUG_ON(ret);
450                 }
451                 if (item_end < inode->i_size) {
452                         if (found_type) {
453                                 btrfs_set_key_type(&key, found_type - 1);
454                                 continue;
455                         }
456                         break;
457                 }
458                 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
459                         del_item = 1;
460                 else
461                         del_item = 0;
462                 found_extent = 0;
463
464                 if (found_type == BTRFS_EXTENT_DATA_KEY &&
465                            btrfs_file_extent_type(fi) !=
466                            BTRFS_FILE_EXTENT_INLINE) {
467                         u64 num_dec;
468                         if (!del_item) {
469                                 u64 orig_num_blocks =
470                                         btrfs_file_extent_num_blocks(fi);
471                                 extent_num_blocks = inode->i_size -
472                                         btrfs_disk_key_offset(found_key) +
473                                         root->blocksize - 1;
474                                 extent_num_blocks >>= inode->i_blkbits;
475                                 btrfs_set_file_extent_num_blocks(fi,
476                                                          extent_num_blocks);
477                                 inode->i_blocks -= (orig_num_blocks -
478                                         extent_num_blocks) << 3;
479                                 mark_buffer_dirty(path->nodes[0]);
480                         } else {
481                                 extent_start =
482                                         btrfs_file_extent_disk_blocknr(fi);
483                                 extent_num_blocks =
484                                         btrfs_file_extent_disk_num_blocks(fi);
485                                 /* FIXME blocksize != 4096 */
486                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
487                                 if (extent_start != 0) {
488                                         found_extent = 1;
489                                         inode->i_blocks -= num_dec;
490                                 }
491                         }
492                 }
493                 if (del_item) {
494                         ret = btrfs_del_item(trans, root, path);
495                         BUG_ON(ret);
496                 } else {
497                         break;
498                 }
499                 btrfs_release_path(root, path);
500                 if (found_extent) {
501                         ret = btrfs_free_extent(trans, root, extent_start,
502                                                 extent_num_blocks, 0);
503                         BUG_ON(ret);
504                 }
505         }
506         ret = 0;
507 error:
508         btrfs_release_path(root, path);
509         btrfs_free_path(path);
510         inode->i_sb->s_dirt = 1;
511         return ret;
512 }
513
514 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
515 {
516         struct inode *inode = mapping->host;
517         unsigned blocksize = 1 << inode->i_blkbits;
518         pgoff_t index = from >> PAGE_CACHE_SHIFT;
519         unsigned offset = from & (PAGE_CACHE_SIZE-1);
520         struct page *page;
521         char *kaddr;
522         int ret = 0;
523         struct btrfs_root *root = BTRFS_I(inode)->root;
524         u64 alloc_hint;
525         struct btrfs_key ins;
526         struct btrfs_trans_handle *trans;
527
528         if ((offset & (blocksize - 1)) == 0)
529                 goto out;
530
531         ret = -ENOMEM;
532         page = grab_cache_page(mapping, index);
533         if (!page)
534                 goto out;
535
536         if (!PageUptodate(page)) {
537                 ret = mpage_readpage(page, btrfs_get_block);
538                 lock_page(page);
539                 if (!PageUptodate(page)) {
540                         ret = -EIO;
541                         goto out;
542                 }
543         }
544         mutex_lock(&root->fs_info->fs_mutex);
545         trans = btrfs_start_transaction(root, 1);
546         btrfs_set_trans_block_group(trans, inode);
547
548         ret = drop_extents(trans, root, inode, page->index << PAGE_CACHE_SHIFT,
549                            (page->index + 1) << PAGE_CACHE_SHIFT, &alloc_hint);
550         BUG_ON(ret);
551         ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
552                                  alloc_hint, (u64)-1, &ins, 1);
553         BUG_ON(ret);
554         ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
555                                        page->index << PAGE_CACHE_SHIFT,
556                                        ins.objectid, 1, 1);
557         BUG_ON(ret);
558         SetPageChecked(page);
559         kaddr = kmap(page);
560         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
561         flush_dcache_page(page);
562         btrfs_csum_file_block(trans, root, inode->i_ino,
563                               page->index << PAGE_CACHE_SHIFT,
564                               kaddr, PAGE_CACHE_SIZE);
565         kunmap(page);
566         btrfs_end_transaction(trans, root);
567         mutex_unlock(&root->fs_info->fs_mutex);
568
569         set_page_dirty(page);
570         unlock_page(page);
571         page_cache_release(page);
572 out:
573         return ret;
574 }
575
576 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
577 {
578         struct inode *inode = dentry->d_inode;
579         int err;
580
581         err = inode_change_ok(inode, attr);
582         if (err)
583                 return err;
584
585         if (S_ISREG(inode->i_mode) &&
586             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
587                 struct btrfs_trans_handle *trans;
588                 struct btrfs_root *root = BTRFS_I(inode)->root;
589                 u64 mask = root->blocksize - 1;
590                 u64 pos = (inode->i_size + mask) & ~mask;
591                 u64 hole_size;
592
593                 if (attr->ia_size <= pos)
594                         goto out;
595
596                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
597
598                 hole_size = (attr->ia_size - pos + mask) & ~mask;
599                 hole_size >>= inode->i_blkbits;
600
601                 mutex_lock(&root->fs_info->fs_mutex);
602                 trans = btrfs_start_transaction(root, 1);
603                 btrfs_set_trans_block_group(trans, inode);
604                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
605                                                pos, 0, 0, hole_size);
606                 BUG_ON(err);
607                 btrfs_end_transaction(trans, root);
608                 mutex_unlock(&root->fs_info->fs_mutex);
609         }
610 out:
611         err = inode_setattr(inode, attr);
612
613         return err;
614 }
615 static void btrfs_delete_inode(struct inode *inode)
616 {
617         struct btrfs_trans_handle *trans;
618         struct btrfs_root *root = BTRFS_I(inode)->root;
619         int ret;
620
621         truncate_inode_pages(&inode->i_data, 0);
622         if (is_bad_inode(inode)) {
623                 goto no_delete;
624         }
625         inode->i_size = 0;
626         mutex_lock(&root->fs_info->fs_mutex);
627         trans = btrfs_start_transaction(root, 1);
628         btrfs_set_trans_block_group(trans, inode);
629         ret = btrfs_truncate_in_trans(trans, root, inode);
630         BUG_ON(ret);
631         btrfs_free_inode(trans, root, inode);
632         btrfs_end_transaction(trans, root);
633         mutex_unlock(&root->fs_info->fs_mutex);
634         btrfs_btree_balance_dirty(root);
635         return;
636 no_delete:
637         clear_inode(inode);
638 }
639
640 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
641                                struct btrfs_key *location)
642 {
643         const char *name = dentry->d_name.name;
644         int namelen = dentry->d_name.len;
645         struct btrfs_dir_item *di;
646         struct btrfs_path *path;
647         struct btrfs_root *root = BTRFS_I(dir)->root;
648         int ret;
649
650         path = btrfs_alloc_path();
651         BUG_ON(!path);
652         btrfs_init_path(path);
653         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
654                                     namelen, 0);
655         if (!di || IS_ERR(di)) {
656                 location->objectid = 0;
657                 ret = 0;
658                 goto out;
659         }
660         btrfs_disk_key_to_cpu(location, &di->location);
661 out:
662         btrfs_release_path(root, path);
663         btrfs_free_path(path);
664         return ret;
665 }
666
667 static int fixup_tree_root_location(struct btrfs_root *root,
668                              struct btrfs_key *location,
669                              struct btrfs_root **sub_root)
670 {
671         struct btrfs_path *path;
672         struct btrfs_root_item *ri;
673
674         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
675                 return 0;
676         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
677                 return 0;
678
679         path = btrfs_alloc_path();
680         BUG_ON(!path);
681         mutex_lock(&root->fs_info->fs_mutex);
682
683         *sub_root = btrfs_read_fs_root(root->fs_info, location);
684         if (IS_ERR(*sub_root))
685                 return PTR_ERR(*sub_root);
686
687         ri = &(*sub_root)->root_item;
688         location->objectid = btrfs_root_dirid(ri);
689         location->flags = 0;
690         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
691         location->offset = 0;
692
693         btrfs_free_path(path);
694         mutex_unlock(&root->fs_info->fs_mutex);
695         return 0;
696 }
697
698 static int btrfs_init_locked_inode(struct inode *inode, void *p)
699 {
700         struct btrfs_iget_args *args = p;
701         inode->i_ino = args->ino;
702         BTRFS_I(inode)->root = args->root;
703         return 0;
704 }
705
706 static int btrfs_find_actor(struct inode *inode, void *opaque)
707 {
708         struct btrfs_iget_args *args = opaque;
709         return (args->ino == inode->i_ino &&
710                 args->root == BTRFS_I(inode)->root);
711 }
712
713 static struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
714                                        struct btrfs_root *root)
715 {
716         struct inode *inode;
717         struct btrfs_iget_args args;
718         args.ino = objectid;
719         args.root = root;
720
721         inode = iget5_locked(s, objectid, btrfs_find_actor,
722                              btrfs_init_locked_inode,
723                              (void *)&args);
724         return inode;
725 }
726
727 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
728                                    struct nameidata *nd)
729 {
730         struct inode * inode;
731         struct btrfs_inode *bi = BTRFS_I(dir);
732         struct btrfs_root *root = bi->root;
733         struct btrfs_root *sub_root = root;
734         struct btrfs_key location;
735         int ret;
736
737         if (dentry->d_name.len > BTRFS_NAME_LEN)
738                 return ERR_PTR(-ENAMETOOLONG);
739         mutex_lock(&root->fs_info->fs_mutex);
740         ret = btrfs_inode_by_name(dir, dentry, &location);
741         mutex_unlock(&root->fs_info->fs_mutex);
742         if (ret < 0)
743                 return ERR_PTR(ret);
744         inode = NULL;
745         if (location.objectid) {
746                 ret = fixup_tree_root_location(root, &location, &sub_root);
747                 if (ret < 0)
748                         return ERR_PTR(ret);
749                 if (ret > 0)
750                         return ERR_PTR(-ENOENT);
751                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
752                                           sub_root);
753                 if (!inode)
754                         return ERR_PTR(-EACCES);
755                 if (inode->i_state & I_NEW) {
756                         if (sub_root != root) {
757 printk("adding new root for inode %lu root %p (found %p)\n", inode->i_ino, sub_root, BTRFS_I(inode)->root);
758                                 igrab(inode);
759                                 sub_root->inode = inode;
760                         }
761                         BTRFS_I(inode)->root = sub_root;
762                         memcpy(&BTRFS_I(inode)->location, &location,
763                                sizeof(location));
764                         btrfs_read_locked_inode(inode);
765                         unlock_new_inode(inode);
766                 }
767         }
768         return d_splice_alias(inode, dentry);
769 }
770
771 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
772                          u64 objectid)
773 {
774         struct btrfs_node *node;
775         int i;
776         u32 nritems;
777         u64 item_objectid;
778         u64 blocknr;
779         int slot;
780         int ret;
781
782         if (!path->nodes[1])
783                 return;
784         node = btrfs_buffer_node(path->nodes[1]);
785         slot = path->slots[1];
786         nritems = btrfs_header_nritems(&node->header);
787         for (i = slot + 1; i < nritems; i++) {
788                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
789                 if (item_objectid != objectid)
790                         break;
791                 blocknr = btrfs_node_blockptr(node, i);
792                 ret = readahead_tree_block(root, blocknr);
793                 if (ret)
794                         break;
795         }
796 }
797 static unsigned char btrfs_filetype_table[] = {
798         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
799 };
800
801 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
802 {
803         struct inode *inode = filp->f_path.dentry->d_inode;
804         struct btrfs_root *root = BTRFS_I(inode)->root;
805         struct btrfs_item *item;
806         struct btrfs_dir_item *di;
807         struct btrfs_key key;
808         struct btrfs_path *path;
809         int ret;
810         u32 nritems;
811         struct btrfs_leaf *leaf;
812         int slot;
813         int advance;
814         unsigned char d_type;
815         int over = 0;
816         u32 di_cur;
817         u32 di_total;
818         u32 di_len;
819         int key_type = BTRFS_DIR_INDEX_KEY;
820
821         /* FIXME, use a real flag for deciding about the key type */
822         if (root->fs_info->tree_root == root)
823                 key_type = BTRFS_DIR_ITEM_KEY;
824         mutex_lock(&root->fs_info->fs_mutex);
825         key.objectid = inode->i_ino;
826         key.flags = 0;
827         btrfs_set_key_type(&key, key_type);
828         key.offset = filp->f_pos;
829         path = btrfs_alloc_path();
830         btrfs_init_path(path);
831         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
832         if (ret < 0)
833                 goto err;
834         advance = 0;
835         reada_leaves(root, path, inode->i_ino);
836         while(1) {
837                 leaf = btrfs_buffer_leaf(path->nodes[0]);
838                 nritems = btrfs_header_nritems(&leaf->header);
839                 slot = path->slots[0];
840                 if (advance || slot >= nritems) {
841                         if (slot >= nritems -1) {
842                                 reada_leaves(root, path, inode->i_ino);
843                                 ret = btrfs_next_leaf(root, path);
844                                 if (ret)
845                                         break;
846                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
847                                 nritems = btrfs_header_nritems(&leaf->header);
848                                 slot = path->slots[0];
849                         } else {
850                                 slot++;
851                                 path->slots[0]++;
852                         }
853                 }
854                 advance = 1;
855                 item = leaf->items + slot;
856                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
857                         break;
858                 if (btrfs_disk_key_type(&item->key) != key_type)
859                         break;
860                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
861                         continue;
862                 filp->f_pos = btrfs_disk_key_offset(&item->key);
863                 advance = 1;
864                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
865                 di_cur = 0;
866                 di_total = btrfs_item_size(leaf->items + slot);
867                 while(di_cur < di_total) {
868                         d_type = btrfs_filetype_table[btrfs_dir_type(di)];
869                         over = filldir(dirent, (const char *)(di + 1),
870                                        btrfs_dir_name_len(di),
871                                        btrfs_disk_key_offset(&item->key),
872                                        btrfs_disk_key_objectid(&di->location),
873                                        d_type);
874                         if (over)
875                                 goto nopos;
876                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
877                         di_cur += di_len;
878                         di = (struct btrfs_dir_item *)((char *)di + di_len);
879                 }
880         }
881         filp->f_pos++;
882 nopos:
883         ret = 0;
884 err:
885         btrfs_release_path(root, path);
886         btrfs_free_path(path);
887         mutex_unlock(&root->fs_info->fs_mutex);
888         return ret;
889 }
890
891 static void btrfs_put_super (struct super_block * sb)
892 {
893         struct btrfs_root *root = btrfs_sb(sb);
894         int ret;
895
896         ret = close_ctree(root);
897         if (ret) {
898                 printk("close ctree returns %d\n", ret);
899         }
900         sb->s_fs_info = NULL;
901 }
902
903 static int btrfs_fill_super(struct super_block * sb, void * data, int silent)
904 {
905         struct inode * inode;
906         struct dentry * root_dentry;
907         struct btrfs_super_block *disk_super;
908         struct btrfs_root *tree_root;
909         struct btrfs_inode *bi;
910
911         sb->s_maxbytes = MAX_LFS_FILESIZE;
912         sb->s_magic = BTRFS_SUPER_MAGIC;
913         sb->s_op = &btrfs_super_ops;
914         sb->s_time_gran = 1;
915
916         tree_root = open_ctree(sb);
917
918         if (!tree_root) {
919                 printk("btrfs: open_ctree failed\n");
920                 return -EIO;
921         }
922         sb->s_fs_info = tree_root;
923         disk_super = tree_root->fs_info->disk_super;
924         printk("read in super total blocks %Lu root %Lu\n",
925                btrfs_super_total_blocks(disk_super),
926                btrfs_super_root_dir(disk_super));
927
928         inode = btrfs_iget_locked(sb, btrfs_super_root_dir(disk_super),
929                                   tree_root);
930         bi = BTRFS_I(inode);
931         bi->location.objectid = inode->i_ino;
932         bi->location.offset = 0;
933         bi->location.flags = 0;
934         bi->root = tree_root;
935         btrfs_set_key_type(&bi->location, BTRFS_INODE_ITEM_KEY);
936
937         if (!inode)
938                 return -ENOMEM;
939         if (inode->i_state & I_NEW) {
940                 btrfs_read_locked_inode(inode);
941                 unlock_new_inode(inode);
942         }
943
944         root_dentry = d_alloc_root(inode);
945         if (!root_dentry) {
946                 iput(inode);
947                 return -ENOMEM;
948         }
949         sb->s_root = root_dentry;
950         btrfs_transaction_queue_work(tree_root, HZ * 30);
951         return 0;
952 }
953
954 static int btrfs_write_inode(struct inode *inode, int wait)
955 {
956         struct btrfs_root *root = BTRFS_I(inode)->root;
957         struct btrfs_trans_handle *trans;
958         int ret = 0;
959
960         if (wait) {
961                 mutex_lock(&root->fs_info->fs_mutex);
962                 trans = btrfs_start_transaction(root, 1);
963                 btrfs_set_trans_block_group(trans, inode);
964                 ret = btrfs_commit_transaction(trans, root);
965                 mutex_unlock(&root->fs_info->fs_mutex);
966         }
967         return ret;
968 }
969
970 static void btrfs_dirty_inode(struct inode *inode)
971 {
972         struct btrfs_root *root = BTRFS_I(inode)->root;
973         struct btrfs_trans_handle *trans;
974
975         mutex_lock(&root->fs_info->fs_mutex);
976         trans = btrfs_start_transaction(root, 1);
977         btrfs_set_trans_block_group(trans, inode);
978         btrfs_update_inode(trans, root, inode);
979         btrfs_end_transaction(trans, root);
980         mutex_unlock(&root->fs_info->fs_mutex);
981         btrfs_btree_balance_dirty(root);
982 }
983
984 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
985                                      struct btrfs_root *root,
986                                      u64 objectid,
987                                      struct btrfs_block_group_cache *group,
988                                      int mode)
989 {
990         struct inode *inode;
991         struct btrfs_inode_item inode_item;
992         struct btrfs_key *location;
993         int ret;
994         int owner;
995
996         inode = new_inode(root->fs_info->sb);
997         if (!inode)
998                 return ERR_PTR(-ENOMEM);
999
1000         BTRFS_I(inode)->root = root;
1001         if (mode & S_IFDIR)
1002                 owner = 0;
1003         else
1004                 owner = 1;
1005         group = btrfs_find_block_group(root, group, 0, 0, owner);
1006         BTRFS_I(inode)->block_group = group;
1007
1008         inode->i_uid = current->fsuid;
1009         inode->i_gid = current->fsgid;
1010         inode->i_mode = mode;
1011         inode->i_ino = objectid;
1012         inode->i_blocks = 0;
1013         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1014         fill_inode_item(&inode_item, inode);
1015         location = &BTRFS_I(inode)->location;
1016         location->objectid = objectid;
1017         location->flags = 0;
1018         location->offset = 0;
1019         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1020
1021         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
1022         BUG_ON(ret);
1023
1024         insert_inode_hash(inode);
1025         return inode;
1026 }
1027
1028 static inline u8 btrfs_inode_type(struct inode *inode)
1029 {
1030         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1031 }
1032
1033 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1034                             struct dentry *dentry, struct inode *inode)
1035 {
1036         int ret;
1037         struct btrfs_key key;
1038         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1039         key.objectid = inode->i_ino;
1040         key.flags = 0;
1041         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1042         key.offset = 0;
1043
1044         ret = btrfs_insert_dir_item(trans, root,
1045                                     dentry->d_name.name, dentry->d_name.len,
1046                                     dentry->d_parent->d_inode->i_ino,
1047                                     &key, btrfs_inode_type(inode));
1048         if (ret == 0) {
1049                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1050                 ret = btrfs_update_inode(trans, root,
1051                                          dentry->d_parent->d_inode);
1052         }
1053         return ret;
1054 }
1055
1056 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1057                             struct dentry *dentry, struct inode *inode)
1058 {
1059         int err = btrfs_add_link(trans, dentry, inode);
1060         if (!err) {
1061                 d_instantiate(dentry, inode);
1062                 return 0;
1063         }
1064         if (err > 0)
1065                 err = -EEXIST;
1066         return err;
1067 }
1068
1069 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1070                         int mode, struct nameidata *nd)
1071 {
1072         struct btrfs_trans_handle *trans;
1073         struct btrfs_root *root = BTRFS_I(dir)->root;
1074         struct inode *inode;
1075         int err;
1076         int drop_inode = 0;
1077         u64 objectid;
1078
1079         mutex_lock(&root->fs_info->fs_mutex);
1080         trans = btrfs_start_transaction(root, 1);
1081         btrfs_set_trans_block_group(trans, dir);
1082
1083         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1084         if (err) {
1085                 err = -ENOSPC;
1086                 goto out_unlock;
1087         }
1088
1089         inode = btrfs_new_inode(trans, root, objectid,
1090                                 BTRFS_I(dir)->block_group, mode);
1091         err = PTR_ERR(inode);
1092         if (IS_ERR(inode))
1093                 goto out_unlock;
1094
1095         btrfs_set_trans_block_group(trans, inode);
1096         err = btrfs_add_nondir(trans, dentry, inode);
1097         if (err)
1098                 drop_inode = 1;
1099         else {
1100                 inode->i_mapping->a_ops = &btrfs_aops;
1101                 inode->i_fop = &btrfs_file_operations;
1102                 inode->i_op = &btrfs_file_inode_operations;
1103         }
1104         dir->i_sb->s_dirt = 1;
1105         btrfs_update_inode_block_group(trans, inode);
1106         btrfs_update_inode_block_group(trans, dir);
1107 out_unlock:
1108         btrfs_end_transaction(trans, root);
1109         mutex_unlock(&root->fs_info->fs_mutex);
1110
1111         if (drop_inode) {
1112                 inode_dec_link_count(inode);
1113                 iput(inode);
1114         }
1115         btrfs_btree_balance_dirty(root);
1116         return err;
1117 }
1118
1119 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1120                       struct dentry *dentry)
1121 {
1122         struct btrfs_trans_handle *trans;
1123         struct btrfs_root *root = BTRFS_I(dir)->root;
1124         struct inode *inode = old_dentry->d_inode;
1125         int err;
1126         int drop_inode = 0;
1127
1128         if (inode->i_nlink == 0)
1129                 return -ENOENT;
1130
1131         inc_nlink(inode);
1132         mutex_lock(&root->fs_info->fs_mutex);
1133         trans = btrfs_start_transaction(root, 1);
1134         btrfs_set_trans_block_group(trans, dir);
1135         atomic_inc(&inode->i_count);
1136         err = btrfs_add_nondir(trans, dentry, inode);
1137         if (err)
1138                 drop_inode = 1;
1139         dir->i_sb->s_dirt = 1;
1140         btrfs_update_inode_block_group(trans, dir);
1141         btrfs_update_inode(trans, root, inode);
1142
1143         btrfs_end_transaction(trans, root);
1144         mutex_unlock(&root->fs_info->fs_mutex);
1145
1146         if (drop_inode) {
1147                 inode_dec_link_count(inode);
1148                 iput(inode);
1149         }
1150         btrfs_btree_balance_dirty(root);
1151         return err;
1152 }
1153
1154 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1155                                 struct btrfs_root *root,
1156                                 u64 objectid, u64 dirid)
1157 {
1158         int ret;
1159         char buf[2];
1160         struct btrfs_key key;
1161
1162         buf[0] = '.';
1163         buf[1] = '.';
1164
1165         key.objectid = objectid;
1166         key.offset = 0;
1167         key.flags = 0;
1168         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1169
1170         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1171                                     &key, BTRFS_FT_DIR);
1172         if (ret)
1173                 goto error;
1174         key.objectid = dirid;
1175         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1176                                     &key, BTRFS_FT_DIR);
1177         if (ret)
1178                 goto error;
1179 error:
1180         return ret;
1181 }
1182
1183 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1184 {
1185         struct inode *inode;
1186         struct btrfs_trans_handle *trans;
1187         struct btrfs_root *root = BTRFS_I(dir)->root;
1188         int err = 0;
1189         int drop_on_err = 0;
1190         u64 objectid;
1191
1192         mutex_lock(&root->fs_info->fs_mutex);
1193         trans = btrfs_start_transaction(root, 1);
1194         btrfs_set_trans_block_group(trans, dir);
1195         if (IS_ERR(trans)) {
1196                 err = PTR_ERR(trans);
1197                 goto out_unlock;
1198         }
1199
1200         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1201         if (err) {
1202                 err = -ENOSPC;
1203                 goto out_unlock;
1204         }
1205
1206         inode = btrfs_new_inode(trans, root, objectid,
1207                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1208         if (IS_ERR(inode)) {
1209                 err = PTR_ERR(inode);
1210                 goto out_fail;
1211         }
1212         drop_on_err = 1;
1213         inode->i_op = &btrfs_dir_inode_operations;
1214         inode->i_fop = &btrfs_dir_file_operations;
1215         btrfs_set_trans_block_group(trans, inode);
1216
1217         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1218         if (err)
1219                 goto out_fail;
1220
1221         inode->i_size = 6;
1222         err = btrfs_update_inode(trans, root, inode);
1223         if (err)
1224                 goto out_fail;
1225         err = btrfs_add_link(trans, dentry, inode);
1226         if (err)
1227                 goto out_fail;
1228         d_instantiate(dentry, inode);
1229         drop_on_err = 0;
1230         dir->i_sb->s_dirt = 1;
1231         btrfs_update_inode_block_group(trans, inode);
1232         btrfs_update_inode_block_group(trans, dir);
1233
1234 out_fail:
1235         btrfs_end_transaction(trans, root);
1236 out_unlock:
1237         mutex_unlock(&root->fs_info->fs_mutex);
1238         if (drop_on_err)
1239                 iput(inode);
1240         btrfs_btree_balance_dirty(root);
1241         return err;
1242 }
1243
1244 static int btrfs_sync_file(struct file *file,
1245                            struct dentry *dentry, int datasync)
1246 {
1247         struct inode *inode = dentry->d_inode;
1248         struct btrfs_root *root = BTRFS_I(inode)->root;
1249         int ret;
1250         struct btrfs_trans_handle *trans;
1251
1252         mutex_lock(&root->fs_info->fs_mutex);
1253         trans = btrfs_start_transaction(root, 1);
1254         if (!trans) {
1255                 ret = -ENOMEM;
1256                 goto out;
1257         }
1258         ret = btrfs_commit_transaction(trans, root);
1259         mutex_unlock(&root->fs_info->fs_mutex);
1260 out:
1261         return ret > 0 ? EIO : ret;
1262 }
1263
1264 static int btrfs_sync_fs(struct super_block *sb, int wait)
1265 {
1266         struct btrfs_trans_handle *trans;
1267         struct btrfs_root *root;
1268         int ret;
1269         root = btrfs_sb(sb);
1270
1271         sb->s_dirt = 0;
1272         if (!wait) {
1273                 filemap_flush(root->fs_info->btree_inode->i_mapping);
1274                 return 0;
1275         }
1276         mutex_lock(&root->fs_info->fs_mutex);
1277         trans = btrfs_start_transaction(root, 1);
1278         ret = btrfs_commit_transaction(trans, root);
1279         sb->s_dirt = 0;
1280         BUG_ON(ret);
1281 printk("btrfs sync_fs\n");
1282         mutex_unlock(&root->fs_info->fs_mutex);
1283         return 0;
1284 }
1285
1286 #define BTRFS_GET_BLOCK_NO_CREATE 0
1287 #define BTRFS_GET_BLOCK_CREATE 1
1288 #define BTRFS_GET_BLOCK_NO_DIRECT 2
1289
1290 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1291                            struct buffer_head *result, int create)
1292 {
1293         int ret;
1294         int err = 0;
1295         u64 blocknr;
1296         u64 extent_start = 0;
1297         u64 extent_end = 0;
1298         u64 objectid = inode->i_ino;
1299         u32 found_type;
1300         u64 alloc_hint = 0;
1301         struct btrfs_path *path;
1302         struct btrfs_root *root = BTRFS_I(inode)->root;
1303         struct btrfs_file_extent_item *item;
1304         struct btrfs_leaf *leaf;
1305         struct btrfs_disk_key *found_key;
1306         struct btrfs_trans_handle *trans = NULL;
1307
1308         path = btrfs_alloc_path();
1309         BUG_ON(!path);
1310         btrfs_init_path(path);
1311         if (create & BTRFS_GET_BLOCK_CREATE) {
1312                 WARN_ON(1);
1313                 /* this almost but not quite works */
1314                 trans = btrfs_start_transaction(root, 1);
1315                 if (!trans) {
1316                         err = -ENOMEM;
1317                         goto out;
1318                 }
1319                 ret = drop_extents(trans, root, inode,
1320                                    iblock << inode->i_blkbits,
1321                                    (iblock + 1) << inode->i_blkbits,
1322                                    &alloc_hint);
1323                 BUG_ON(ret);
1324         }
1325
1326         ret = btrfs_lookup_file_extent(NULL, root, path,
1327                                        inode->i_ino,
1328                                        iblock << inode->i_blkbits, 0);
1329         if (ret < 0) {
1330                 err = ret;
1331                 goto out;
1332         }
1333
1334         if (ret != 0) {
1335                 if (path->slots[0] == 0) {
1336                         btrfs_release_path(root, path);
1337                         goto not_found;
1338                 }
1339                 path->slots[0]--;
1340         }
1341
1342         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1343                               struct btrfs_file_extent_item);
1344         leaf = btrfs_buffer_leaf(path->nodes[0]);
1345         blocknr = btrfs_file_extent_disk_blocknr(item);
1346         blocknr += btrfs_file_extent_offset(item);
1347
1348         /* are we inside the extent that was found? */
1349         found_key = &leaf->items[path->slots[0]].key;
1350         found_type = btrfs_disk_key_type(found_key);
1351         if (btrfs_disk_key_objectid(found_key) != objectid ||
1352             found_type != BTRFS_EXTENT_DATA_KEY) {
1353                 extent_end = 0;
1354                 extent_start = 0;
1355                 goto not_found;
1356         }
1357         found_type = btrfs_file_extent_type(item);
1358         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1359         if (found_type == BTRFS_FILE_EXTENT_REG) {
1360                 extent_start = extent_start >> inode->i_blkbits;
1361                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1362                 err = 0;
1363                 if (btrfs_file_extent_disk_blocknr(item) == 0)
1364                         goto out;
1365                 if (iblock >= extent_start && iblock < extent_end) {
1366                         btrfs_map_bh_to_logical(root, result, blocknr +
1367                                                 iblock - extent_start);
1368                         goto out;
1369                 }
1370         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1371                 char *ptr;
1372                 char *map;
1373                 u32 size;
1374
1375                 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1376                         err = -EINVAL;
1377                         goto out;
1378                 }
1379                 size = btrfs_file_extent_inline_len(leaf->items +
1380                                                     path->slots[0]);
1381                 extent_end = (extent_start + size) >> inode->i_blkbits;
1382                 extent_start >>= inode->i_blkbits;
1383                 if (iblock < extent_start || iblock > extent_end) {
1384                         goto not_found;
1385                 }
1386                 ptr = btrfs_file_extent_inline_start(item);
1387                 map = kmap(result->b_page);
1388                 memcpy(map, ptr, size);
1389                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1390                 flush_dcache_page(result->b_page);
1391                 kunmap(result->b_page);
1392                 set_buffer_uptodate(result);
1393                 SetPageChecked(result->b_page);
1394                 btrfs_map_bh_to_logical(root, result, 0);
1395         }
1396 not_found:
1397         if (create & BTRFS_GET_BLOCK_CREATE) {
1398                 struct btrfs_key ins;
1399                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1400                                          1, alloc_hint, (u64)-1,
1401                                          &ins, 1);
1402                 BUG_ON(ret);
1403                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1404                                                iblock << inode->i_blkbits,
1405                                                ins.objectid, ins.offset,
1406                                                ins.offset);
1407                 BUG_ON(ret);
1408                 SetPageChecked(result->b_page);
1409                 btrfs_map_bh_to_logical(root, result, ins.objectid);
1410         }
1411 out:
1412         if (trans)
1413                 err = btrfs_end_transaction(trans, root);
1414         btrfs_free_path(path);
1415         return err;
1416 }
1417
1418 static int btrfs_get_block(struct inode *inode, sector_t iblock,
1419                            struct buffer_head *result, int create)
1420 {
1421         int err;
1422         struct btrfs_root *root = BTRFS_I(inode)->root;
1423         mutex_lock(&root->fs_info->fs_mutex);
1424         err = btrfs_get_block_lock(inode, iblock, result, create);
1425         mutex_unlock(&root->fs_info->fs_mutex);
1426         return err;
1427 }
1428
1429 static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1430                            struct buffer_head *result, int create)
1431 {
1432         struct btrfs_root *root = BTRFS_I(inode)->root;
1433         mutex_lock(&root->fs_info->fs_mutex);
1434         btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT);
1435         mutex_unlock(&root->fs_info->fs_mutex);
1436         return 0;
1437 }
1438
1439 static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1440 {
1441         return generic_block_bmap(as, block, btrfs_get_block_bmap);
1442 }
1443
1444 static int btrfs_prepare_write(struct file *file, struct page *page,
1445                                unsigned from, unsigned to)
1446 {
1447         return block_prepare_write(page, from, to, btrfs_get_block);
1448 }
1449
1450 static void btrfs_write_super(struct super_block *sb)
1451 {
1452         sb->s_dirt = 0;
1453 }
1454
1455 static int btrfs_readpage(struct file *file, struct page *page)
1456 {
1457         return mpage_readpage(page, btrfs_get_block);
1458 }
1459
1460 /*
1461  * While block_write_full_page is writing back the dirty buffers under
1462  * the page lock, whoever dirtied the buffers may decide to clean them
1463  * again at any time.  We handle that by only looking at the buffer
1464  * state inside lock_buffer().
1465  *
1466  * If block_write_full_page() is called for regular writeback
1467  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1468  * locked buffer.   This only can happen if someone has written the buffer
1469  * directly, with submit_bh().  At the address_space level PageWriteback
1470  * prevents this contention from occurring.
1471  */
1472 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1473                                    struct writeback_control *wbc)
1474 {
1475         int err;
1476         sector_t block;
1477         sector_t last_block;
1478         struct buffer_head *bh, *head;
1479         const unsigned blocksize = 1 << inode->i_blkbits;
1480         int nr_underway = 0;
1481
1482         BUG_ON(!PageLocked(page));
1483
1484         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1485
1486         if (!page_has_buffers(page)) {
1487                 create_empty_buffers(page, blocksize,
1488                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1489         }
1490
1491         /*
1492          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1493          * here, and the (potentially unmapped) buffers may become dirty at
1494          * any time.  If a buffer becomes dirty here after we've inspected it
1495          * then we just miss that fact, and the page stays dirty.
1496          *
1497          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1498          * handle that here by just cleaning them.
1499          */
1500
1501         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1502         head = page_buffers(page);
1503         bh = head;
1504
1505         /*
1506          * Get all the dirty buffers mapped to disk addresses and
1507          * handle any aliases from the underlying blockdev's mapping.
1508          */
1509         do {
1510                 if (block > last_block) {
1511                         /*
1512                          * mapped buffers outside i_size will occur, because
1513                          * this page can be outside i_size when there is a
1514                          * truncate in progress.
1515                          */
1516                         /*
1517                          * The buffer was zeroed by block_write_full_page()
1518                          */
1519                         clear_buffer_dirty(bh);
1520                         set_buffer_uptodate(bh);
1521                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1522                         WARN_ON(bh->b_size != blocksize);
1523                         err = btrfs_get_block(inode, block, bh, 0);
1524                         if (err) {
1525 printk("writepage going to recovery err %d\n", err);
1526                                 goto recover;
1527                         }
1528                         if (buffer_new(bh)) {
1529                                 /* blockdev mappings never come here */
1530                                 clear_buffer_new(bh);
1531                         }
1532                 }
1533                 bh = bh->b_this_page;
1534                 block++;
1535         } while (bh != head);
1536
1537         do {
1538                 if (!buffer_mapped(bh))
1539                         continue;
1540                 /*
1541                  * If it's a fully non-blocking write attempt and we cannot
1542                  * lock the buffer then redirty the page.  Note that this can
1543                  * potentially cause a busy-wait loop from pdflush and kswapd
1544                  * activity, but those code paths have their own higher-level
1545                  * throttling.
1546                  */
1547                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1548                         lock_buffer(bh);
1549                 } else if (test_set_buffer_locked(bh)) {
1550                         redirty_page_for_writepage(wbc, page);
1551                         continue;
1552                 }
1553                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1554                         mark_buffer_async_write(bh);
1555                 } else {
1556                         unlock_buffer(bh);
1557                 }
1558         } while ((bh = bh->b_this_page) != head);
1559
1560         /*
1561          * The page and its buffers are protected by PageWriteback(), so we can
1562          * drop the bh refcounts early.
1563          */
1564         BUG_ON(PageWriteback(page));
1565         set_page_writeback(page);
1566
1567         do {
1568                 struct buffer_head *next = bh->b_this_page;
1569                 if (buffer_async_write(bh)) {
1570                         submit_bh(WRITE, bh);
1571                         nr_underway++;
1572                 }
1573                 bh = next;
1574         } while (bh != head);
1575         unlock_page(page);
1576
1577         err = 0;
1578 done:
1579         if (nr_underway == 0) {
1580                 /*
1581                  * The page was marked dirty, but the buffers were
1582                  * clean.  Someone wrote them back by hand with
1583                  * ll_rw_block/submit_bh.  A rare case.
1584                  */
1585                 int uptodate = 1;
1586                 do {
1587                         if (!buffer_uptodate(bh)) {
1588                                 uptodate = 0;
1589                                 break;
1590                         }
1591                         bh = bh->b_this_page;
1592                 } while (bh != head);
1593                 if (uptodate)
1594                         SetPageUptodate(page);
1595                 end_page_writeback(page);
1596         }
1597         return err;
1598
1599 recover:
1600         /*
1601          * ENOSPC, or some other error.  We may already have added some
1602          * blocks to the file, so we need to write these out to avoid
1603          * exposing stale data.
1604          * The page is currently locked and not marked for writeback
1605          */
1606         bh = head;
1607         /* Recovery: lock and submit the mapped buffers */
1608         do {
1609                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1610                         lock_buffer(bh);
1611                         mark_buffer_async_write(bh);
1612                 } else {
1613                         /*
1614                          * The buffer may have been set dirty during
1615                          * attachment to a dirty page.
1616                          */
1617                         clear_buffer_dirty(bh);
1618                 }
1619         } while ((bh = bh->b_this_page) != head);
1620         SetPageError(page);
1621         BUG_ON(PageWriteback(page));
1622         set_page_writeback(page);
1623         do {
1624                 struct buffer_head *next = bh->b_this_page;
1625                 if (buffer_async_write(bh)) {
1626                         clear_buffer_dirty(bh);
1627                         submit_bh(WRITE, bh);
1628                         nr_underway++;
1629                 }
1630                 bh = next;
1631         } while (bh != head);
1632         unlock_page(page);
1633         goto done;
1634 }
1635
1636 /*
1637  * The generic ->writepage function for buffer-backed address_spaces
1638  */
1639 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1640 {
1641         struct inode * const inode = page->mapping->host;
1642         loff_t i_size = i_size_read(inode);
1643         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1644         unsigned offset;
1645         void *kaddr;
1646
1647         /* Is the page fully inside i_size? */
1648         if (page->index < end_index)
1649                 return __btrfs_write_full_page(inode, page, wbc);
1650
1651         /* Is the page fully outside i_size? (truncate in progress) */
1652         offset = i_size & (PAGE_CACHE_SIZE-1);
1653         if (page->index >= end_index+1 || !offset) {
1654                 /*
1655                  * The page may have dirty, unmapped buffers.  For example,
1656                  * they may have been added in ext3_writepage().  Make them
1657                  * freeable here, so the page does not leak.
1658                  */
1659                 block_invalidatepage(page, 0);
1660                 unlock_page(page);
1661                 return 0; /* don't care */
1662         }
1663
1664         /*
1665          * The page straddles i_size.  It must be zeroed out on each and every
1666          * writepage invokation because it may be mmapped.  "A file is mapped
1667          * in multiples of the page size.  For a file that is not a multiple of
1668          * the  page size, the remaining memory is zeroed when mapped, and
1669          * writes to that region are not written out to the file."
1670          */
1671         kaddr = kmap_atomic(page, KM_USER0);
1672         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1673         flush_dcache_page(page);
1674         kunmap_atomic(kaddr, KM_USER0);
1675         return __btrfs_write_full_page(inode, page, wbc);
1676 }
1677
1678 static void btrfs_truncate(struct inode *inode)
1679 {
1680         struct btrfs_root *root = BTRFS_I(inode)->root;
1681         int ret;
1682         struct btrfs_trans_handle *trans;
1683
1684         if (!S_ISREG(inode->i_mode))
1685                 return;
1686         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1687                 return;
1688
1689         btrfs_truncate_page(inode->i_mapping, inode->i_size);
1690
1691         mutex_lock(&root->fs_info->fs_mutex);
1692         trans = btrfs_start_transaction(root, 1);
1693         btrfs_set_trans_block_group(trans, inode);
1694
1695         /* FIXME, add redo link to tree so we don't leak on crash */
1696         ret = btrfs_truncate_in_trans(trans, root, inode);
1697         BUG_ON(ret);
1698         btrfs_update_inode(trans, root, inode);
1699         ret = btrfs_end_transaction(trans, root);
1700         BUG_ON(ret);
1701         mutex_unlock(&root->fs_info->fs_mutex);
1702         btrfs_btree_balance_dirty(root);
1703 }
1704
1705 static int btrfs_commit_write(struct file *file, struct page *page,
1706                               unsigned from, unsigned to)
1707 {
1708         struct inode *inode = page->mapping->host;
1709         struct buffer_head *bh;
1710         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1711
1712         SetPageUptodate(page);
1713         bh = page_buffers(page);
1714         set_buffer_uptodate(bh);
1715         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1716                 set_page_dirty(page);
1717         }
1718         if (pos > inode->i_size) {
1719                 i_size_write(inode, pos);
1720                 mark_inode_dirty(inode);
1721         }
1722         return 0;
1723 }
1724
1725 static int btrfs_copy_from_user(loff_t pos, int num_pages, int write_bytes,
1726                                 struct page **prepared_pages,
1727                                 const char __user * buf)
1728 {
1729         long page_fault = 0;
1730         int i;
1731         int offset = pos & (PAGE_CACHE_SIZE - 1);
1732
1733         for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) {
1734                 size_t count = min_t(size_t,
1735                                      PAGE_CACHE_SIZE - offset, write_bytes);
1736                 struct page *page = prepared_pages[i];
1737                 fault_in_pages_readable(buf, count);
1738
1739                 /* Copy data from userspace to the current page */
1740                 kmap(page);
1741                 page_fault = __copy_from_user(page_address(page) + offset,
1742                                               buf, count);
1743                 /* Flush processor's dcache for this page */
1744                 flush_dcache_page(page);
1745                 kunmap(page);
1746                 buf += count;
1747                 write_bytes -= count;
1748
1749                 if (page_fault)
1750                         break;
1751         }
1752         return page_fault ? -EFAULT : 0;
1753 }
1754
1755 static void btrfs_drop_pages(struct page **pages, size_t num_pages)
1756 {
1757         size_t i;
1758         for (i = 0; i < num_pages; i++) {
1759                 if (!pages[i])
1760                         break;
1761                 unlock_page(pages[i]);
1762                 mark_page_accessed(pages[i]);
1763                 page_cache_release(pages[i]);
1764         }
1765 }
1766 static int dirty_and_release_pages(struct btrfs_trans_handle *trans,
1767                                    struct btrfs_root *root,
1768                                    struct file *file,
1769                                    struct page **pages,
1770                                    size_t num_pages,
1771                                    loff_t pos,
1772                                    size_t write_bytes)
1773 {
1774         int i;
1775         int offset;
1776         int err = 0;
1777         int ret;
1778         int this_write;
1779         struct inode *inode = file->f_path.dentry->d_inode;
1780         struct buffer_head *bh;
1781         struct btrfs_file_extent_item *ei;
1782
1783         for (i = 0; i < num_pages; i++) {
1784                 offset = pos & (PAGE_CACHE_SIZE -1);
1785                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
1786                 /* FIXME, one block at a time */
1787
1788                 mutex_lock(&root->fs_info->fs_mutex);
1789                 trans = btrfs_start_transaction(root, 1);
1790                 btrfs_set_trans_block_group(trans, inode);
1791
1792                 bh = page_buffers(pages[i]);
1793                 if (buffer_mapped(bh) && bh->b_blocknr == 0) {
1794                         struct btrfs_key key;
1795                         struct btrfs_path *path;
1796                         char *ptr;
1797                         u32 datasize;
1798
1799                         path = btrfs_alloc_path();
1800                         BUG_ON(!path);
1801                         key.objectid = inode->i_ino;
1802                         key.offset = pages[i]->index << PAGE_CACHE_SHIFT;
1803                         key.flags = 0;
1804                         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
1805                         BUG_ON(write_bytes >= PAGE_CACHE_SIZE);
1806                         datasize = offset +
1807                                 btrfs_file_extent_calc_inline_size(write_bytes);
1808                         ret = btrfs_insert_empty_item(trans, root, path, &key,
1809                                                       datasize);
1810                         BUG_ON(ret);
1811                         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
1812                                path->slots[0], struct btrfs_file_extent_item);
1813                         btrfs_set_file_extent_generation(ei, trans->transid);
1814                         btrfs_set_file_extent_type(ei,
1815                                                    BTRFS_FILE_EXTENT_INLINE);
1816                         ptr = btrfs_file_extent_inline_start(ei);
1817                         btrfs_memcpy(root, path->nodes[0]->b_data,
1818                                      ptr, bh->b_data, offset + write_bytes);
1819                         mark_buffer_dirty(path->nodes[0]);
1820                         btrfs_free_path(path);
1821                 } else if (buffer_mapped(bh)) {
1822                         btrfs_csum_file_block(trans, root, inode->i_ino,
1823                                       pages[i]->index << PAGE_CACHE_SHIFT,
1824                                       kmap(pages[i]), PAGE_CACHE_SIZE);
1825                         kunmap(pages[i]);
1826                 }
1827                 SetPageChecked(pages[i]);
1828                 // btrfs_update_inode_block_group(trans, inode);
1829                 ret = btrfs_end_transaction(trans, root);
1830                 BUG_ON(ret);
1831                 mutex_unlock(&root->fs_info->fs_mutex);
1832
1833                 ret = btrfs_commit_write(file, pages[i], offset,
1834                                          offset + this_write);
1835                 pos += this_write;
1836                 if (ret) {
1837                         err = ret;
1838                         goto failed;
1839                 }
1840                 WARN_ON(this_write > write_bytes);
1841                 write_bytes -= this_write;
1842         }
1843 failed:
1844         return err;
1845 }
1846
1847 static int drop_extents(struct btrfs_trans_handle *trans,
1848                           struct btrfs_root *root,
1849                           struct inode *inode,
1850                           u64 start, u64 end, u64 *hint_block)
1851 {
1852         int ret;
1853         struct btrfs_key key;
1854         struct btrfs_leaf *leaf;
1855         int slot;
1856         struct btrfs_file_extent_item *extent;
1857         u64 extent_end = 0;
1858         int keep;
1859         struct btrfs_file_extent_item old;
1860         struct btrfs_path *path;
1861         u64 search_start = start;
1862         int bookend;
1863         int found_type;
1864         int found_extent;
1865         int found_inline;
1866
1867         path = btrfs_alloc_path();
1868         if (!path)
1869                 return -ENOMEM;
1870         while(1) {
1871                 btrfs_release_path(root, path);
1872                 ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
1873                                                search_start, -1);
1874                 if (ret < 0)
1875                         goto out;
1876                 if (ret > 0) {
1877                         if (path->slots[0] == 0) {
1878                                 ret = 0;
1879                                 goto out;
1880                         }
1881                         path->slots[0]--;
1882                 }
1883                 keep = 0;
1884                 bookend = 0;
1885                 found_extent = 0;
1886                 found_inline = 0;
1887                 extent = NULL;
1888                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1889                 slot = path->slots[0];
1890                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
1891                 if (key.offset >= end || key.objectid != inode->i_ino) {
1892                         ret = 0;
1893                         goto out;
1894                 }
1895                 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) {
1896                         ret = 0;
1897                         goto out;
1898                 }
1899                 extent = btrfs_item_ptr(leaf, slot,
1900                                         struct btrfs_file_extent_item);
1901                 found_type = btrfs_file_extent_type(extent);
1902                 if (found_type == BTRFS_FILE_EXTENT_REG) {
1903                         extent_end = key.offset +
1904                                 (btrfs_file_extent_num_blocks(extent) <<
1905                                  inode->i_blkbits);
1906                         found_extent = 1;
1907                 } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1908                         found_inline = 1;
1909                         extent_end = key.offset +
1910                              btrfs_file_extent_inline_len(leaf->items + slot);
1911                 }
1912
1913                 if (!found_extent && !found_inline) {
1914                         ret = 0;
1915                         goto out;
1916                 }
1917
1918                 if (search_start >= extent_end) {
1919                         ret = 0;
1920                         goto out;
1921                 }
1922
1923                 if (found_inline) {
1924                         u64 mask = root->blocksize - 1;
1925                         search_start = (extent_end + mask) & ~mask;
1926                 } else
1927                         search_start = extent_end;
1928
1929                 if (end < extent_end && end >= key.offset) {
1930                         if (found_extent) {
1931                                 u64 disk_blocknr =
1932                                         btrfs_file_extent_disk_blocknr(extent);
1933                                 u64 disk_num_blocks =
1934                                       btrfs_file_extent_disk_num_blocks(extent);
1935                                 memcpy(&old, extent, sizeof(old));
1936                                 if (disk_blocknr != 0) {
1937                                         ret = btrfs_inc_extent_ref(trans, root,
1938                                                  disk_blocknr, disk_num_blocks);
1939                                         BUG_ON(ret);
1940                                 }
1941                         }
1942                         WARN_ON(found_inline);
1943                         bookend = 1;
1944                 }
1945
1946                 if (start > key.offset) {
1947                         u64 new_num;
1948                         u64 old_num;
1949                         /* truncate existing extent */
1950                         keep = 1;
1951                         WARN_ON(start & (root->blocksize - 1));
1952                         if (found_extent) {
1953                                 new_num = (start - key.offset) >>
1954                                         inode->i_blkbits;
1955                                 old_num = btrfs_file_extent_num_blocks(extent);
1956                                 *hint_block =
1957                                         btrfs_file_extent_disk_blocknr(extent);
1958                                 if (btrfs_file_extent_disk_blocknr(extent)) {
1959                                         inode->i_blocks -=
1960                                                 (old_num - new_num) << 3;
1961                                 }
1962                                 btrfs_set_file_extent_num_blocks(extent,
1963                                                                  new_num);
1964                                 mark_buffer_dirty(path->nodes[0]);
1965                         } else {
1966                                 WARN_ON(1);
1967                         }
1968                 }
1969                 if (!keep) {
1970                         u64 disk_blocknr = 0;
1971                         u64 disk_num_blocks = 0;
1972                         u64 extent_num_blocks = 0;
1973                         if (found_extent) {
1974                                 disk_blocknr =
1975                                       btrfs_file_extent_disk_blocknr(extent);
1976                                 disk_num_blocks =
1977                                       btrfs_file_extent_disk_num_blocks(extent);
1978                                 extent_num_blocks =
1979                                       btrfs_file_extent_num_blocks(extent);
1980                                 *hint_block =
1981                                         btrfs_file_extent_disk_blocknr(extent);
1982                         }
1983                         ret = btrfs_del_item(trans, root, path);
1984                         BUG_ON(ret);
1985                         btrfs_release_path(root, path);
1986                         extent = NULL;
1987                         if (found_extent && disk_blocknr != 0) {
1988                                 inode->i_blocks -= extent_num_blocks << 3;
1989                                 ret = btrfs_free_extent(trans, root,
1990                                                         disk_blocknr,
1991                                                         disk_num_blocks, 0);
1992                         }
1993
1994                         BUG_ON(ret);
1995                         if (!bookend && search_start >= end) {
1996                                 ret = 0;
1997                                 goto out;
1998                         }
1999                         if (!bookend)
2000                                 continue;
2001                 }
2002                 if (bookend && found_extent) {
2003                         /* create bookend */
2004                         struct btrfs_key ins;
2005                         ins.objectid = inode->i_ino;
2006                         ins.offset = end;
2007                         ins.flags = 0;
2008                         btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
2009
2010                         btrfs_release_path(root, path);
2011                         ret = btrfs_insert_empty_item(trans, root, path, &ins,
2012                                                       sizeof(*extent));
2013                         BUG_ON(ret);
2014                         extent = btrfs_item_ptr(
2015                                     btrfs_buffer_leaf(path->nodes[0]),
2016                                     path->slots[0],
2017                                     struct btrfs_file_extent_item);
2018                         btrfs_set_file_extent_disk_blocknr(extent,
2019                                     btrfs_file_extent_disk_blocknr(&old));
2020                         btrfs_set_file_extent_disk_num_blocks(extent,
2021                                     btrfs_file_extent_disk_num_blocks(&old));
2022
2023                         btrfs_set_file_extent_offset(extent,
2024                                     btrfs_file_extent_offset(&old) +
2025                                     ((end - key.offset) >> inode->i_blkbits));
2026                         WARN_ON(btrfs_file_extent_num_blocks(&old) <
2027                                 (extent_end - end) >> inode->i_blkbits);
2028                         btrfs_set_file_extent_num_blocks(extent,
2029                                     (extent_end - end) >> inode->i_blkbits);
2030
2031                         btrfs_set_file_extent_type(extent,
2032                                                    BTRFS_FILE_EXTENT_REG);
2033                         btrfs_set_file_extent_generation(extent,
2034                                     btrfs_file_extent_generation(&old));
2035                         btrfs_mark_buffer_dirty(path->nodes[0]);
2036                         if (btrfs_file_extent_disk_blocknr(&old) != 0) {
2037                                 inode->i_blocks +=
2038                                       btrfs_file_extent_num_blocks(extent) << 3;
2039                         }
2040                         ret = 0;
2041                         goto out;
2042                 }
2043         }
2044 out:
2045         btrfs_free_path(path);
2046         return ret;
2047 }
2048
2049 static int prepare_pages(struct btrfs_root *root,
2050                          struct file *file,
2051                          struct page **pages,
2052                          size_t num_pages,
2053                          loff_t pos,
2054                          unsigned long first_index,
2055                          unsigned long last_index,
2056                          size_t write_bytes,
2057                          u64 alloc_extent_start)
2058 {
2059         int i;
2060         unsigned long index = pos >> PAGE_CACHE_SHIFT;
2061         struct inode *inode = file->f_path.dentry->d_inode;
2062         int offset;
2063         int err = 0;
2064         int this_write;
2065         struct buffer_head *bh;
2066         struct buffer_head *head;
2067         loff_t isize = i_size_read(inode);
2068
2069         memset(pages, 0, num_pages * sizeof(struct page *));
2070
2071         for (i = 0; i < num_pages; i++) {
2072                 pages[i] = grab_cache_page(inode->i_mapping, index + i);
2073                 if (!pages[i]) {
2074                         err = -ENOMEM;
2075                         goto failed_release;
2076                 }
2077                 cancel_dirty_page(pages[i], PAGE_CACHE_SIZE);
2078                 wait_on_page_writeback(pages[i]);
2079                 offset = pos & (PAGE_CACHE_SIZE -1);
2080                 this_write = min(PAGE_CACHE_SIZE - offset, write_bytes);
2081                 if (!page_has_buffers(pages[i])) {
2082                         create_empty_buffers(pages[i],
2083                                              root->fs_info->sb->s_blocksize,
2084                                              (1 << BH_Uptodate));
2085                 }
2086                 head = page_buffers(pages[i]);
2087                 bh = head;
2088                 do {
2089                         err = btrfs_map_bh_to_logical(root, bh,
2090                                                       alloc_extent_start);
2091                         BUG_ON(err);
2092                         if (err)
2093                                 goto failed_truncate;
2094                         bh = bh->b_this_page;
2095                         if (alloc_extent_start)
2096                                 alloc_extent_start++;
2097                 } while (bh != head);
2098                 pos += this_write;
2099                 WARN_ON(this_write > write_bytes);
2100                 write_bytes -= this_write;
2101         }
2102         return 0;
2103
2104 failed_release:
2105         btrfs_drop_pages(pages, num_pages);
2106         return err;
2107
2108 failed_truncate:
2109         btrfs_drop_pages(pages, num_pages);
2110         if (pos > isize)
2111                 vmtruncate(inode, isize);
2112         return err;
2113 }
2114
2115 static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
2116                                 size_t count, loff_t *ppos)
2117 {
2118         loff_t pos;
2119         size_t num_written = 0;
2120         int err = 0;
2121         int ret = 0;
2122         struct inode *inode = file->f_path.dentry->d_inode;
2123         struct btrfs_root *root = BTRFS_I(inode)->root;
2124         struct page *pages[8];
2125         struct page *pinned[2];
2126         unsigned long first_index;
2127         unsigned long last_index;
2128         u64 start_pos;
2129         u64 num_blocks;
2130         u64 alloc_extent_start;
2131         u64 hint_block;
2132         struct btrfs_trans_handle *trans;
2133         struct btrfs_key ins;
2134         pinned[0] = NULL;
2135         pinned[1] = NULL;
2136         if (file->f_flags & O_DIRECT)
2137                 return -EINVAL;
2138         pos = *ppos;
2139         vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
2140         current->backing_dev_info = inode->i_mapping->backing_dev_info;
2141         err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode));
2142         if (err)
2143                 goto out;
2144         if (count == 0)
2145                 goto out;
2146         err = remove_suid(file->f_path.dentry);
2147         if (err)
2148                 goto out;
2149         file_update_time(file);
2150
2151         start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
2152         num_blocks = (count + pos - start_pos + root->blocksize - 1) >>
2153                         inode->i_blkbits;
2154
2155         mutex_lock(&inode->i_mutex);
2156         first_index = pos >> PAGE_CACHE_SHIFT;
2157         last_index = (pos + count) >> PAGE_CACHE_SHIFT;
2158
2159         if ((pos & (PAGE_CACHE_SIZE - 1))) {
2160                 pinned[0] = grab_cache_page(inode->i_mapping, first_index);
2161                 if (!PageUptodate(pinned[0])) {
2162                         ret = mpage_readpage(pinned[0], btrfs_get_block);
2163                         BUG_ON(ret);
2164                         wait_on_page_locked(pinned[0]);
2165                 } else {
2166                         unlock_page(pinned[0]);
2167                 }
2168         }
2169         if ((pos + count) & (PAGE_CACHE_SIZE - 1)) {
2170                 pinned[1] = grab_cache_page(inode->i_mapping, last_index);
2171                 if (!PageUptodate(pinned[1])) {
2172                         ret = mpage_readpage(pinned[1], btrfs_get_block);
2173                         BUG_ON(ret);
2174                         wait_on_page_locked(pinned[1]);
2175                 } else {
2176                         unlock_page(pinned[1]);
2177                 }
2178         }
2179
2180         mutex_lock(&root->fs_info->fs_mutex);
2181         trans = btrfs_start_transaction(root, 1);
2182         if (!trans) {
2183                 err = -ENOMEM;
2184                 mutex_unlock(&root->fs_info->fs_mutex);
2185                 goto out_unlock;
2186         }
2187         btrfs_set_trans_block_group(trans, inode);
2188         /* FIXME blocksize != 4096 */
2189         inode->i_blocks += num_blocks << 3;
2190         hint_block = 0;
2191         if (start_pos < inode->i_size) {
2192                 /* FIXME blocksize != pagesize */
2193                 ret = drop_extents(trans, root, inode,
2194                                    start_pos,
2195                                    (pos + count + root->blocksize -1) &
2196                                    ~((u64)root->blocksize - 1), &hint_block);
2197                 BUG_ON(ret);
2198         }
2199         if (inode->i_size < start_pos) {
2200                 u64 last_pos_in_file;
2201                 u64 hole_size;
2202                 u64 mask = root->blocksize - 1;
2203                 last_pos_in_file = (inode->i_size + mask) & ~mask;
2204                 hole_size = (start_pos - last_pos_in_file + mask) & ~mask;
2205                 hole_size >>= inode->i_blkbits;
2206                 if (last_pos_in_file < start_pos) {
2207                         ret = btrfs_insert_file_extent(trans, root,
2208                                                        inode->i_ino,
2209                                                        last_pos_in_file,
2210                                                        0, 0, hole_size);
2211                 }
2212                 BUG_ON(ret);
2213         }
2214         if (inode->i_size >= PAGE_CACHE_SIZE || pos + count < inode->i_size ||
2215             pos + count - start_pos > BTRFS_MAX_INLINE_DATA_SIZE(root)) {
2216                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
2217                                          num_blocks, hint_block, (u64)-1,
2218                                          &ins, 1);
2219                 BUG_ON(ret);
2220                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
2221                                        start_pos, ins.objectid, ins.offset,
2222                                        ins.offset);
2223                 BUG_ON(ret);
2224         } else {
2225                 ins.offset = 0;
2226                 ins.objectid = 0;
2227         }
2228         BUG_ON(ret);
2229         alloc_extent_start = ins.objectid;
2230         // btrfs_update_inode_block_group(trans, inode);
2231         ret = btrfs_end_transaction(trans, root);
2232         mutex_unlock(&root->fs_info->fs_mutex);
2233
2234         while(count > 0) {
2235                 size_t offset = pos & (PAGE_CACHE_SIZE - 1);
2236                 size_t write_bytes = min(count, PAGE_CACHE_SIZE - offset);
2237                 size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >>
2238                                         PAGE_CACHE_SHIFT;
2239
2240                 memset(pages, 0, sizeof(pages));
2241                 ret = prepare_pages(root, file, pages, num_pages,
2242                                     pos, first_index, last_index,
2243                                     write_bytes, alloc_extent_start);
2244                 BUG_ON(ret);
2245
2246                 /* FIXME blocks != pagesize */
2247                 if (alloc_extent_start)
2248                         alloc_extent_start += num_pages;
2249                 ret = btrfs_copy_from_user(pos, num_pages,
2250                                            write_bytes, pages, buf);
2251                 BUG_ON(ret);
2252
2253                 ret = dirty_and_release_pages(NULL, root, file, pages,
2254                                               num_pages, pos, write_bytes);
2255                 BUG_ON(ret);
2256                 btrfs_drop_pages(pages, num_pages);
2257
2258                 buf += write_bytes;
2259                 count -= write_bytes;
2260                 pos += write_bytes;
2261                 num_written += write_bytes;
2262
2263                 balance_dirty_pages_ratelimited(inode->i_mapping);
2264                 btrfs_btree_balance_dirty(root);
2265                 cond_resched();
2266         }
2267 out_unlock:
2268         mutex_unlock(&inode->i_mutex);
2269 out:
2270         if (pinned[0])
2271                 page_cache_release(pinned[0]);
2272         if (pinned[1])
2273                 page_cache_release(pinned[1]);
2274         *ppos = pos;
2275         current->backing_dev_info = NULL;
2276         mark_inode_dirty(inode);
2277         return num_written ? num_written : err;
2278 }
2279
2280 static int btrfs_read_actor(read_descriptor_t *desc, struct page *page,
2281                         unsigned long offset, unsigned long size)
2282 {
2283         char *kaddr;
2284         unsigned long left, count = desc->count;
2285         struct inode *inode = page->mapping->host;
2286
2287         if (size > count)
2288                 size = count;
2289
2290         if (!PageChecked(page)) {
2291                 /* FIXME, do it per block */
2292                 struct btrfs_root *root = BTRFS_I(inode)->root;
2293                 int ret;
2294                 struct buffer_head *bh;
2295
2296                 if (page_has_buffers(page)) {
2297                         bh = page_buffers(page);
2298                         if (!buffer_mapped(bh)) {
2299                                 SetPageChecked(page);
2300                                 goto checked;
2301                         }
2302                 }
2303
2304                 ret = btrfs_csum_verify_file_block(root,
2305                                   page->mapping->host->i_ino,
2306                                   page->index << PAGE_CACHE_SHIFT,
2307                                   kmap(page), PAGE_CACHE_SIZE);
2308                 if (ret) {
2309                         if (ret != -ENOENT) {
2310                                 printk("failed to verify ino %lu page %lu ret %d\n",
2311                                        page->mapping->host->i_ino,
2312                                        page->index, ret);
2313                                 memset(page_address(page), 1, PAGE_CACHE_SIZE);
2314                                 flush_dcache_page(page);
2315                         }
2316                 }
2317                 SetPageChecked(page);
2318                 kunmap(page);
2319         }
2320 checked:
2321         /*
2322          * Faults on the destination of a read are common, so do it before
2323          * taking the kmap.
2324          */
2325         if (!fault_in_pages_writeable(desc->arg.buf, size)) {
2326                 kaddr = kmap_atomic(page, KM_USER0);
2327                 left = __copy_to_user_inatomic(desc->arg.buf,
2328                                                 kaddr + offset, size);
2329                 kunmap_atomic(kaddr, KM_USER0);
2330                 if (left == 0)
2331                         goto success;
2332         }
2333
2334         /* Do it the slow way */
2335         kaddr = kmap(page);
2336         left = __copy_to_user(desc->arg.buf, kaddr + offset, size);
2337         kunmap(page);
2338
2339         if (left) {
2340                 size -= left;
2341                 desc->error = -EFAULT;
2342         }
2343 success:
2344         desc->count = count - size;
2345         desc->written += size;
2346         desc->arg.buf += size;
2347         return size;
2348 }
2349
2350 /**
2351  * btrfs_file_aio_read - filesystem read routine
2352  * @iocb:       kernel I/O control block
2353  * @iov:        io vector request
2354  * @nr_segs:    number of segments in the iovec
2355  * @pos:        current file position
2356  */
2357 static ssize_t btrfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
2358                                    unsigned long nr_segs, loff_t pos)
2359 {
2360         struct file *filp = iocb->ki_filp;
2361         ssize_t retval;
2362         unsigned long seg;
2363         size_t count;
2364         loff_t *ppos = &iocb->ki_pos;
2365
2366         count = 0;
2367         for (seg = 0; seg < nr_segs; seg++) {
2368                 const struct iovec *iv = &iov[seg];
2369
2370                 /*
2371                  * If any segment has a negative length, or the cumulative
2372                  * length ever wraps negative then return -EINVAL.
2373                  */
2374                 count += iv->iov_len;
2375                 if (unlikely((ssize_t)(count|iv->iov_len) < 0))
2376                         return -EINVAL;
2377                 if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
2378                         continue;
2379                 if (seg == 0)
2380                         return -EFAULT;
2381                 nr_segs = seg;
2382                 count -= iv->iov_len;   /* This segment is no good */
2383                 break;
2384         }
2385         retval = 0;
2386         if (count) {
2387                 for (seg = 0; seg < nr_segs; seg++) {
2388                         read_descriptor_t desc;
2389
2390                         desc.written = 0;
2391                         desc.arg.buf = iov[seg].iov_base;
2392                         desc.count = iov[seg].iov_len;
2393                         if (desc.count == 0)
2394                                 continue;
2395                         desc.error = 0;
2396                         do_generic_file_read(filp, ppos, &desc,
2397                                              btrfs_read_actor);
2398                         retval += desc.written;
2399                         if (desc.error) {
2400                                 retval = retval ?: desc.error;
2401                                 break;
2402                         }
2403                 }
2404         }
2405         return retval;
2406 }
2407
2408 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
2409 {
2410         struct btrfs_trans_handle *trans;
2411         struct btrfs_key key;
2412         struct btrfs_root_item root_item;
2413         struct btrfs_inode_item *inode_item;
2414         struct buffer_head *subvol;
2415         struct btrfs_leaf *leaf;
2416         struct btrfs_root *new_root;
2417         struct inode *inode;
2418         struct inode *dir;
2419         int ret;
2420         u64 objectid;
2421         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2422
2423         mutex_lock(&root->fs_info->fs_mutex);
2424         trans = btrfs_start_transaction(root, 1);
2425         BUG_ON(!trans);
2426
2427         subvol = btrfs_alloc_free_block(trans, root, 0);
2428         if (subvol == NULL)
2429                 return -ENOSPC;
2430         leaf = btrfs_buffer_leaf(subvol);
2431         btrfs_set_header_nritems(&leaf->header, 0);
2432         btrfs_set_header_level(&leaf->header, 0);
2433         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
2434         btrfs_set_header_generation(&leaf->header, trans->transid);
2435         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
2436         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
2437                sizeof(leaf->header.fsid));
2438         mark_buffer_dirty(subvol);
2439
2440         inode_item = &root_item.inode;
2441         memset(inode_item, 0, sizeof(*inode_item));
2442         btrfs_set_inode_generation(inode_item, 1);
2443         btrfs_set_inode_size(inode_item, 3);
2444         btrfs_set_inode_nlink(inode_item, 1);
2445         btrfs_set_inode_nblocks(inode_item, 1);
2446         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
2447
2448         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
2449         btrfs_set_root_refs(&root_item, 1);
2450         brelse(subvol);
2451         subvol = NULL;
2452
2453         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2454                                        0, &objectid);
2455         BUG_ON(ret);
2456
2457         btrfs_set_root_dirid(&root_item, new_dirid);
2458
2459         key.objectid = objectid;
2460         key.offset = 1;
2461         key.flags = 0;
2462         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2463         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2464                                 &root_item);
2465         BUG_ON(ret);
2466
2467         /*
2468          * insert the directory item
2469          */
2470         key.offset = (u64)-1;
2471         dir = root->fs_info->sb->s_root->d_inode;
2472         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2473                                     name, namelen, dir->i_ino, &key,
2474                                     BTRFS_FT_DIR);
2475         BUG_ON(ret);
2476
2477         ret = btrfs_commit_transaction(trans, root);
2478         BUG_ON(ret);
2479
2480         new_root = btrfs_read_fs_root(root->fs_info, &key);
2481         BUG_ON(!new_root);
2482
2483         trans = btrfs_start_transaction(new_root, 1);
2484         BUG_ON(!trans);
2485
2486         inode = btrfs_new_inode(trans, new_root, new_dirid,
2487                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2488         inode->i_op = &btrfs_dir_inode_operations;
2489         inode->i_fop = &btrfs_dir_file_operations;
2490
2491         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
2492         BUG_ON(ret);
2493
2494         inode->i_nlink = 1;
2495         inode->i_size = 6;
2496         ret = btrfs_update_inode(trans, new_root, inode);
2497         BUG_ON(ret);
2498
2499         ret = btrfs_commit_transaction(trans, new_root);
2500         BUG_ON(ret);
2501
2502         iput(inode);
2503
2504         mutex_unlock(&root->fs_info->fs_mutex);
2505         btrfs_btree_balance_dirty(root);
2506         return 0;
2507 }
2508
2509 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2510 {
2511         struct btrfs_trans_handle *trans;
2512         struct btrfs_key key;
2513         struct btrfs_root_item new_root_item;
2514         int ret;
2515         u64 objectid;
2516
2517         if (!root->ref_cows)
2518                 return -EINVAL;
2519
2520         mutex_lock(&root->fs_info->fs_mutex);
2521         trans = btrfs_start_transaction(root, 1);
2522         BUG_ON(!trans);
2523
2524         ret = btrfs_update_inode(trans, root, root->inode);
2525         BUG_ON(ret);
2526
2527         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2528                                        0, &objectid);
2529         BUG_ON(ret);
2530
2531         memcpy(&new_root_item, &root->root_item,
2532                sizeof(new_root_item));
2533
2534         key.objectid = objectid;
2535         key.offset = 1;
2536         key.flags = 0;
2537         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2538         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
2539
2540         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2541                                 &new_root_item);
2542         BUG_ON(ret);
2543
2544         /*
2545          * insert the directory item
2546          */
2547         key.offset = (u64)-1;
2548         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2549                                     name, namelen,
2550                                     root->fs_info->sb->s_root->d_inode->i_ino,
2551                                     &key, BTRFS_FT_DIR);
2552
2553         BUG_ON(ret);
2554
2555         ret = btrfs_inc_root_ref(trans, root);
2556         BUG_ON(ret);
2557
2558         ret = btrfs_commit_transaction(trans, root);
2559         BUG_ON(ret);
2560         mutex_unlock(&root->fs_info->fs_mutex);
2561         btrfs_btree_balance_dirty(root);
2562         return 0;
2563 }
2564
2565 static int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
2566                        cmd, unsigned long arg)
2567 {
2568         struct btrfs_root *root = BTRFS_I(inode)->root;
2569         struct btrfs_ioctl_vol_args vol_args;
2570         int ret = 0;
2571         struct btrfs_dir_item *di;
2572         int namelen;
2573         struct btrfs_path *path;
2574         u64 root_dirid;
2575
2576         switch (cmd) {
2577         case BTRFS_IOC_SNAP_CREATE:
2578                 if (copy_from_user(&vol_args,
2579                                    (struct btrfs_ioctl_vol_args __user *)arg,
2580                                    sizeof(vol_args)))
2581                         return -EFAULT;
2582                 namelen = strlen(vol_args.name);
2583                 if (namelen > BTRFS_VOL_NAME_MAX)
2584                         return -EINVAL;
2585                 path = btrfs_alloc_path();
2586                 if (!path)
2587                         return -ENOMEM;
2588                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2589                 mutex_lock(&root->fs_info->fs_mutex);
2590                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2591                                     path, root_dirid,
2592                                     vol_args.name, namelen, 0);
2593                 mutex_unlock(&root->fs_info->fs_mutex);
2594                 btrfs_free_path(path);
2595                 if (di && !IS_ERR(di))
2596                         return -EEXIST;
2597
2598                 if (root == root->fs_info->tree_root)
2599                         ret = create_subvol(root, vol_args.name, namelen);
2600                 else
2601                         ret = create_snapshot(root, vol_args.name, namelen);
2602                 WARN_ON(ret);
2603                 break;
2604         default:
2605                 return -ENOTTY;
2606         }
2607         return ret;
2608 }
2609
2610 #ifdef CONFIG_COMPAT
2611 static long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
2612                                unsigned long arg)
2613 {
2614         struct inode *inode = file->f_path.dentry->d_inode;
2615         int ret;
2616         lock_kernel();
2617         ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
2618         unlock_kernel();
2619         return ret;
2620
2621 }
2622 #endif
2623
2624 static struct kmem_cache *btrfs_inode_cachep;
2625 struct kmem_cache *btrfs_trans_handle_cachep;
2626 struct kmem_cache *btrfs_transaction_cachep;
2627 struct kmem_cache *btrfs_bit_radix_cachep;
2628 struct kmem_cache *btrfs_path_cachep;
2629
2630 /*
2631  * Called inside transaction, so use GFP_NOFS
2632  */
2633 static struct inode *btrfs_alloc_inode(struct super_block *sb)
2634 {
2635         struct btrfs_inode *ei;
2636
2637         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2638         if (!ei)
2639                 return NULL;
2640         return &ei->vfs_inode;
2641 }
2642
2643 static void btrfs_destroy_inode(struct inode *inode)
2644 {
2645         WARN_ON(!list_empty(&inode->i_dentry));
2646         WARN_ON(inode->i_data.nrpages);
2647
2648         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2649 }
2650
2651 static void init_once(void * foo, struct kmem_cache * cachep,
2652                       unsigned long flags)
2653 {
2654         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2655
2656         if ((flags & (SLAB_CTOR_CONSTRUCTOR)) ==
2657             SLAB_CTOR_CONSTRUCTOR) {
2658                 inode_init_once(&ei->vfs_inode);
2659         }
2660 }
2661
2662 static int init_inodecache(void)
2663 {
2664         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
2665                                              sizeof(struct btrfs_inode),
2666                                              0, (SLAB_RECLAIM_ACCOUNT|
2667                                                 SLAB_MEM_SPREAD),
2668                                              init_once, NULL);
2669         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
2670                                              sizeof(struct btrfs_trans_handle),
2671                                              0, (SLAB_RECLAIM_ACCOUNT|
2672                                                 SLAB_MEM_SPREAD),
2673                                              NULL, NULL);
2674         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
2675                                              sizeof(struct btrfs_transaction),
2676                                              0, (SLAB_RECLAIM_ACCOUNT|
2677                                                 SLAB_MEM_SPREAD),
2678                                              NULL, NULL);
2679         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
2680                                              sizeof(struct btrfs_transaction),
2681                                              0, (SLAB_RECLAIM_ACCOUNT|
2682                                                 SLAB_MEM_SPREAD),
2683                                              NULL, NULL);
2684         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
2685                                              256,
2686                                              0, (SLAB_RECLAIM_ACCOUNT|
2687                                                 SLAB_MEM_SPREAD |
2688                                                 SLAB_DESTROY_BY_RCU),
2689                                              NULL, NULL);
2690         if (btrfs_inode_cachep == NULL || btrfs_trans_handle_cachep == NULL ||
2691             btrfs_transaction_cachep == NULL || btrfs_bit_radix_cachep == NULL)
2692                 return -ENOMEM;
2693         return 0;
2694 }
2695
2696 static void destroy_inodecache(void)
2697 {
2698         kmem_cache_destroy(btrfs_inode_cachep);
2699         kmem_cache_destroy(btrfs_trans_handle_cachep);
2700         kmem_cache_destroy(btrfs_transaction_cachep);
2701         kmem_cache_destroy(btrfs_bit_radix_cachep);
2702         kmem_cache_destroy(btrfs_path_cachep);
2703 }
2704
2705 static int btrfs_get_sb(struct file_system_type *fs_type,
2706         int flags, const char *dev_name, void *data, struct vfsmount *mnt)
2707 {
2708         return get_sb_bdev(fs_type, flags, dev_name, data,
2709                            btrfs_fill_super, mnt);
2710 }
2711
2712 static int btrfs_getattr(struct vfsmount *mnt,
2713                          struct dentry *dentry, struct kstat *stat)
2714 {
2715         struct inode *inode = dentry->d_inode;
2716         generic_fillattr(inode, stat);
2717         stat->blksize = 256 * 1024;
2718         return 0;
2719 }
2720
2721 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
2722 {
2723         struct btrfs_root *root = btrfs_sb(dentry->d_sb);
2724         struct btrfs_super_block *disk_super = root->fs_info->disk_super;
2725
2726         buf->f_namelen = BTRFS_NAME_LEN;
2727         buf->f_blocks = btrfs_super_total_blocks(disk_super);
2728         buf->f_bfree = buf->f_blocks - btrfs_super_blocks_used(disk_super);
2729         buf->f_bavail = buf->f_bfree;
2730         buf->f_bsize = dentry->d_sb->s_blocksize;
2731         buf->f_type = BTRFS_SUPER_MAGIC;
2732         return 0;
2733 }
2734
2735 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2736                            struct inode * new_dir,struct dentry *new_dentry)
2737 {
2738         struct btrfs_trans_handle *trans;
2739         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2740         struct inode *new_inode = new_dentry->d_inode;
2741         struct inode *old_inode = old_dentry->d_inode;
2742         struct timespec ctime = CURRENT_TIME;
2743         struct btrfs_path *path;
2744         struct btrfs_dir_item *di;
2745         int ret;
2746
2747         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2748             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2749                 return -ENOTEMPTY;
2750         }
2751         mutex_lock(&root->fs_info->fs_mutex);
2752         trans = btrfs_start_transaction(root, 1);
2753         btrfs_set_trans_block_group(trans, new_dir);
2754         path = btrfs_alloc_path();
2755         if (!path) {
2756                 ret = -ENOMEM;
2757                 goto out_fail;
2758         }
2759
2760         old_dentry->d_inode->i_nlink++;
2761         old_dir->i_ctime = old_dir->i_mtime = ctime;
2762         new_dir->i_ctime = new_dir->i_mtime = ctime;
2763         old_inode->i_ctime = ctime;
2764         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2765                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2766                 u64 old_parent_oid;
2767                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2768                                            "..", 2, -1);
2769                 if (IS_ERR(di)) {
2770                         ret = PTR_ERR(di);
2771                         goto out_fail;
2772                 }
2773                 if (!di) {
2774                         ret = -ENOENT;
2775                         goto out_fail;
2776                 }
2777                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2778                 ret = btrfs_del_item(trans, root, path);
2779                 if (ret) {
2780                         ret = -EIO;
2781                         goto out_fail;
2782                 }
2783                 btrfs_release_path(root, path);
2784
2785                 di = btrfs_lookup_dir_index_item(trans, root, path,
2786                                                  old_inode->i_ino,
2787                                                  old_parent_oid,
2788                                                  "..", 2, -1);
2789                 if (IS_ERR(di)) {
2790                         ret = PTR_ERR(di);
2791                         goto out_fail;
2792                 }
2793                 if (!di) {
2794                         ret = -ENOENT;
2795                         goto out_fail;
2796                 }
2797                 ret = btrfs_del_item(trans, root, path);
2798                 if (ret) {
2799                         ret = -EIO;
2800                         goto out_fail;
2801                 }
2802                 btrfs_release_path(root, path);
2803
2804                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2805                                             old_inode->i_ino, location,
2806                                             BTRFS_FT_DIR);
2807                 if (ret)
2808                         goto out_fail;
2809         }
2810
2811
2812         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2813         if (ret)
2814                 goto out_fail;
2815
2816         if (new_inode) {
2817                 new_inode->i_ctime = CURRENT_TIME;
2818                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2819                 if (ret)
2820                         goto out_fail;
2821                 if (S_ISDIR(new_inode->i_mode))
2822                         clear_nlink(new_inode);
2823                 else
2824                         drop_nlink(new_inode);
2825                 btrfs_update_inode(trans, root, new_inode);
2826         }
2827         ret = btrfs_add_link(trans, new_dentry, old_inode);
2828         if (ret)
2829                 goto out_fail;
2830
2831 out_fail:
2832         btrfs_free_path(path);
2833         btrfs_end_transaction(trans, root);
2834         mutex_unlock(&root->fs_info->fs_mutex);
2835         return ret;
2836 }
2837
2838 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2839                          const char *symname)
2840 {
2841         struct btrfs_trans_handle *trans;
2842         struct btrfs_root *root = BTRFS_I(dir)->root;
2843         struct btrfs_path *path;
2844         struct btrfs_key key;
2845         struct inode *inode;
2846         int err;
2847         int drop_inode = 0;
2848         u64 objectid;
2849         int name_len;
2850         int datasize;
2851         char *ptr;
2852         struct btrfs_file_extent_item *ei;
2853
2854         name_len = strlen(symname) + 1;
2855         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2856                 return -ENAMETOOLONG;
2857         mutex_lock(&root->fs_info->fs_mutex);
2858         trans = btrfs_start_transaction(root, 1);
2859         btrfs_set_trans_block_group(trans, dir);
2860
2861         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2862         if (err) {
2863                 err = -ENOSPC;
2864                 goto out_unlock;
2865         }
2866
2867         inode = btrfs_new_inode(trans, root, objectid,
2868                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2869         err = PTR_ERR(inode);
2870         if (IS_ERR(inode))
2871                 goto out_unlock;
2872
2873         btrfs_set_trans_block_group(trans, inode);
2874         err = btrfs_add_nondir(trans, dentry, inode);
2875         if (err)
2876                 drop_inode = 1;
2877         else {
2878                 inode->i_mapping->a_ops = &btrfs_aops;
2879                 inode->i_fop = &btrfs_file_operations;
2880                 inode->i_op = &btrfs_file_inode_operations;
2881         }
2882         dir->i_sb->s_dirt = 1;
2883         btrfs_update_inode_block_group(trans, inode);
2884         btrfs_update_inode_block_group(trans, dir);
2885         if (drop_inode)
2886                 goto out_unlock;
2887
2888         path = btrfs_alloc_path();
2889         BUG_ON(!path);
2890         key.objectid = inode->i_ino;
2891         key.offset = 0;
2892         key.flags = 0;
2893         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2894         datasize = btrfs_file_extent_calc_inline_size(name_len);
2895         err = btrfs_insert_empty_item(trans, root, path, &key,
2896                                       datasize);
2897         BUG_ON(err);
2898         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2899                path->slots[0], struct btrfs_file_extent_item);
2900         btrfs_set_file_extent_generation(ei, trans->transid);
2901         btrfs_set_file_extent_type(ei,
2902                                    BTRFS_FILE_EXTENT_INLINE);
2903         ptr = btrfs_file_extent_inline_start(ei);
2904         btrfs_memcpy(root, path->nodes[0]->b_data,
2905                      ptr, symname, name_len);
2906         mark_buffer_dirty(path->nodes[0]);
2907         btrfs_free_path(path);
2908         inode->i_op = &btrfs_symlink_inode_operations;
2909         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2910         inode->i_size = name_len - 1;
2911         btrfs_update_inode(trans, root, inode);
2912         err = 0;
2913
2914 out_unlock:
2915         btrfs_end_transaction(trans, root);
2916         mutex_unlock(&root->fs_info->fs_mutex);
2917
2918         if (drop_inode) {
2919                 inode_dec_link_count(inode);
2920                 iput(inode);
2921         }
2922         btrfs_btree_balance_dirty(root);
2923         return err;
2924 }
2925
2926 static struct file_system_type btrfs_fs_type = {
2927         .owner          = THIS_MODULE,
2928         .name           = "btrfs",
2929         .get_sb         = btrfs_get_sb,
2930         .kill_sb        = kill_block_super,
2931         .fs_flags       = FS_REQUIRES_DEV,
2932 };
2933
2934 static struct super_operations btrfs_super_ops = {
2935         .delete_inode   = btrfs_delete_inode,
2936         .put_super      = btrfs_put_super,
2937         .read_inode     = btrfs_read_locked_inode,
2938         .write_super    = btrfs_write_super,
2939         .sync_fs        = btrfs_sync_fs,
2940         .write_inode    = btrfs_write_inode,
2941         .dirty_inode    = btrfs_dirty_inode,
2942         .alloc_inode    = btrfs_alloc_inode,
2943         .destroy_inode  = btrfs_destroy_inode,
2944         .statfs         = btrfs_statfs,
2945 };
2946
2947 static struct inode_operations btrfs_dir_inode_operations = {
2948         .lookup         = btrfs_lookup,
2949         .create         = btrfs_create,
2950         .unlink         = btrfs_unlink,
2951         .link           = btrfs_link,
2952         .mkdir          = btrfs_mkdir,
2953         .rmdir          = btrfs_rmdir,
2954         .rename         = btrfs_rename,
2955         .symlink        = btrfs_symlink,
2956         .setattr        = btrfs_setattr,
2957 };
2958
2959 static struct inode_operations btrfs_dir_ro_inode_operations = {
2960         .lookup         = btrfs_lookup,
2961 };
2962
2963 static struct file_operations btrfs_dir_file_operations = {
2964         .llseek         = generic_file_llseek,
2965         .read           = generic_read_dir,
2966         .readdir        = btrfs_readdir,
2967         .ioctl          = btrfs_ioctl,
2968 #ifdef CONFIG_COMPAT
2969         .compat_ioctl   = btrfs_compat_ioctl,
2970 #endif
2971 };
2972
2973 static struct address_space_operations btrfs_aops = {
2974         .readpage       = btrfs_readpage,
2975         .writepage      = btrfs_writepage,
2976         .sync_page      = block_sync_page,
2977         .prepare_write  = btrfs_prepare_write,
2978         .commit_write   = btrfs_commit_write,
2979         .bmap           = btrfs_bmap,
2980 };
2981
2982 static struct address_space_operations btrfs_symlink_aops = {
2983         .readpage       = btrfs_readpage,
2984         .writepage      = btrfs_writepage,
2985 };
2986
2987 static struct inode_operations btrfs_file_inode_operations = {
2988         .truncate       = btrfs_truncate,
2989         .getattr        = btrfs_getattr,
2990         .setattr        = btrfs_setattr,
2991 };
2992
2993 static struct file_operations btrfs_file_operations = {
2994         .llseek         = generic_file_llseek,
2995         .read           = do_sync_read,
2996         .aio_read       = btrfs_file_aio_read,
2997         .write          = btrfs_file_write,
2998         .mmap           = generic_file_mmap,
2999         .open           = generic_file_open,
3000         .ioctl          = btrfs_ioctl,
3001         .fsync          = btrfs_sync_file,
3002 #ifdef CONFIG_COMPAT
3003         .compat_ioctl   = btrfs_compat_ioctl,
3004 #endif
3005 };
3006
3007 static struct inode_operations btrfs_symlink_inode_operations = {
3008         .readlink       = generic_readlink,
3009         .follow_link    = page_follow_link_light,
3010         .put_link       = page_put_link,
3011 };
3012
3013 static int __init init_btrfs_fs(void)
3014 {
3015         int err;
3016         printk("btrfs loaded!\n");
3017         btrfs_init_transaction_sys();
3018         err = init_inodecache();
3019         if (err)
3020                 return err;
3021         return register_filesystem(&btrfs_fs_type);
3022         destroy_inodecache();
3023         return err;
3024 }
3025
3026 static void __exit exit_btrfs_fs(void)
3027 {
3028         btrfs_exit_transaction_sys();
3029         destroy_inodecache();
3030         unregister_filesystem(&btrfs_fs_type);
3031         printk("btrfs unloaded\n");
3032 }
3033
3034 module_init(init_btrfs_fs)
3035 module_exit(exit_btrfs_fs)
3036
3037 MODULE_LICENSE("GPL");