Btrfs: Add some simple throttling to wait for data=ordered and snapshot deletion
[linux-2.6] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/buffer_head.h>
20 #include <linux/fs.h>
21 #include <linux/pagemap.h>
22 #include <linux/highmem.h>
23 #include <linux/time.h>
24 #include <linux/init.h>
25 #include <linux/string.h>
26 #include <linux/smp_lock.h>
27 #include <linux/backing-dev.h>
28 #include <linux/mpage.h>
29 #include <linux/swap.h>
30 #include <linux/writeback.h>
31 #include <linux/statfs.h>
32 #include <linux/compat.h>
33 #include <linux/bit_spinlock.h>
34 #include <linux/version.h>
35 #include <linux/xattr.h>
36 #include "ctree.h"
37 #include "disk-io.h"
38 #include "transaction.h"
39 #include "btrfs_inode.h"
40 #include "ioctl.h"
41 #include "print-tree.h"
42
43 struct btrfs_iget_args {
44         u64 ino;
45         struct btrfs_root *root;
46 };
47
48 static struct inode_operations btrfs_dir_inode_operations;
49 static struct inode_operations btrfs_symlink_inode_operations;
50 static struct inode_operations btrfs_dir_ro_inode_operations;
51 static struct inode_operations btrfs_special_inode_operations;
52 static struct inode_operations btrfs_file_inode_operations;
53 static struct address_space_operations btrfs_aops;
54 static struct address_space_operations btrfs_symlink_aops;
55 static struct file_operations btrfs_dir_file_operations;
56 static struct extent_map_ops btrfs_extent_map_ops;
57
58 static struct kmem_cache *btrfs_inode_cachep;
59 struct kmem_cache *btrfs_trans_handle_cachep;
60 struct kmem_cache *btrfs_transaction_cachep;
61 struct kmem_cache *btrfs_bit_radix_cachep;
62 struct kmem_cache *btrfs_path_cachep;
63
64 #define S_SHIFT 12
65 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
66         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
67         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
68         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
69         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
70         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
71         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
72         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
73 };
74
75 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
76                            int for_del)
77 {
78         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
79         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
80         u64 thresh;
81         int ret = 0;
82
83         if (for_del)
84                 thresh = total * 90;
85         else
86                 thresh = total * 85;
87
88         do_div(thresh, 100);
89
90         spin_lock(&root->fs_info->delalloc_lock);
91         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
92                 ret = -ENOSPC;
93         spin_unlock(&root->fs_info->delalloc_lock);
94         return ret;
95 }
96
97 static int cow_file_range(struct inode *inode, u64 start, u64 end)
98 {
99         struct btrfs_root *root = BTRFS_I(inode)->root;
100         struct btrfs_trans_handle *trans;
101         u64 alloc_hint = 0;
102         u64 num_bytes;
103         u64 cur_alloc_size;
104         u64 blocksize = root->sectorsize;
105         struct btrfs_key ins;
106         int ret;
107
108         trans = btrfs_start_transaction(root, 1);
109         BUG_ON(!trans);
110         btrfs_set_trans_block_group(trans, inode);
111
112         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
113         num_bytes = max(blocksize,  num_bytes);
114         ret = btrfs_drop_extents(trans, root, inode,
115                                  start, start + num_bytes, start, &alloc_hint);
116
117         if (alloc_hint == EXTENT_MAP_INLINE)
118                 goto out;
119
120         while(num_bytes > 0) {
121                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
122                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
123                                          root->root_key.objectid,
124                                          trans->transid,
125                                          inode->i_ino, start, 0,
126                                          alloc_hint, (u64)-1, &ins, 1);
127                 if (ret) {
128                         WARN_ON(1);
129                         goto out;
130                 }
131                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
132                                                start, ins.objectid, ins.offset,
133                                                ins.offset);
134                 num_bytes -= cur_alloc_size;
135                 alloc_hint = ins.objectid + ins.offset;
136                 start += cur_alloc_size;
137         }
138         btrfs_add_ordered_inode(inode);
139 out:
140         btrfs_end_transaction(trans, root);
141         return ret;
142 }
143
144 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
145 {
146         u64 extent_start;
147         u64 extent_end;
148         u64 bytenr;
149         u64 cow_end;
150         u64 loops = 0;
151         struct btrfs_root *root = BTRFS_I(inode)->root;
152         struct extent_buffer *leaf;
153         int found_type;
154         struct btrfs_path *path;
155         struct btrfs_file_extent_item *item;
156         int ret;
157         int err;
158         struct btrfs_key found_key;
159
160         path = btrfs_alloc_path();
161         BUG_ON(!path);
162 again:
163         ret = btrfs_lookup_file_extent(NULL, root, path,
164                                        inode->i_ino, start, 0);
165         if (ret < 0) {
166                 btrfs_free_path(path);
167                 return ret;
168         }
169
170         cow_end = end;
171         if (ret != 0) {
172                 if (path->slots[0] == 0)
173                         goto not_found;
174                 path->slots[0]--;
175         }
176
177         leaf = path->nodes[0];
178         item = btrfs_item_ptr(leaf, path->slots[0],
179                               struct btrfs_file_extent_item);
180
181         /* are we inside the extent that was found? */
182         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
183         found_type = btrfs_key_type(&found_key);
184         if (found_key.objectid != inode->i_ino ||
185             found_type != BTRFS_EXTENT_DATA_KEY) {
186                 goto not_found;
187         }
188
189         found_type = btrfs_file_extent_type(leaf, item);
190         extent_start = found_key.offset;
191         if (found_type == BTRFS_FILE_EXTENT_REG) {
192                 extent_end = extent_start +
193                        btrfs_file_extent_num_bytes(leaf, item);
194                 err = 0;
195
196                 if (loops && start != extent_start)
197                         goto not_found;
198
199                 if (start < extent_start || start >= extent_end)
200                         goto not_found;
201
202                 cow_end = min(end, extent_end - 1);
203                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
204                 if (bytenr == 0)
205                         goto not_found;
206
207                 if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
208                         goto not_found;
209                 }
210
211                 start = extent_end;
212         } else {
213                 goto not_found;
214         }
215 loop:
216         if (start > end) {
217                 btrfs_free_path(path);
218                 return 0;
219         }
220         btrfs_release_path(root, path);
221         loops++;
222         goto again;
223
224 not_found:
225         cow_file_range(inode, start, cow_end);
226         start = cow_end + 1;
227         goto loop;
228 }
229
230 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
231 {
232         struct btrfs_root *root = BTRFS_I(inode)->root;
233         u64 num_bytes;
234         int ret;
235
236         mutex_lock(&root->fs_info->fs_mutex);
237         if (btrfs_test_opt(root, NODATACOW))
238                 ret = run_delalloc_nocow(inode, start, end);
239         else
240                 ret = cow_file_range(inode, start, end);
241
242         spin_lock(&root->fs_info->delalloc_lock);
243         num_bytes = end + 1 - start;
244         if (root->fs_info->delalloc_bytes < num_bytes) {
245                 printk("delalloc accounting error total %llu sub %llu\n",
246                        root->fs_info->delalloc_bytes, num_bytes);
247         } else {
248                 root->fs_info->delalloc_bytes -= num_bytes;
249         }
250         spin_unlock(&root->fs_info->delalloc_lock);
251
252         mutex_unlock(&root->fs_info->fs_mutex);
253         return ret;
254 }
255
256 int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end)
257 {
258         struct inode *inode = page->mapping->host;
259         struct btrfs_root *root = BTRFS_I(inode)->root;
260         struct btrfs_trans_handle *trans;
261         char *kaddr;
262         int ret = 0;
263         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
264         size_t offset = start - page_start;
265
266         if (btrfs_test_opt(root, NODATASUM))
267                 return 0;
268
269         mutex_lock(&root->fs_info->fs_mutex);
270         trans = btrfs_start_transaction(root, 1);
271         btrfs_set_trans_block_group(trans, inode);
272         kaddr = kmap(page);
273         btrfs_csum_file_block(trans, root, inode, inode->i_ino,
274                               start, kaddr + offset, end - start + 1);
275         kunmap(page);
276         ret = btrfs_end_transaction(trans, root);
277         BUG_ON(ret);
278         mutex_unlock(&root->fs_info->fs_mutex);
279         return ret;
280 }
281
282 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
283 {
284         int ret = 0;
285         struct inode *inode = page->mapping->host;
286         struct btrfs_root *root = BTRFS_I(inode)->root;
287         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
288         struct btrfs_csum_item *item;
289         struct btrfs_path *path = NULL;
290         u32 csum;
291
292         if (btrfs_test_opt(root, NODATASUM))
293                 return 0;
294
295         mutex_lock(&root->fs_info->fs_mutex);
296         path = btrfs_alloc_path();
297         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
298         if (IS_ERR(item)) {
299                 ret = PTR_ERR(item);
300                 /* a csum that isn't present is a preallocated region. */
301                 if (ret == -ENOENT || ret == -EFBIG)
302                         ret = 0;
303                 csum = 0;
304                 goto out;
305         }
306         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
307                            BTRFS_CRC32_SIZE);
308         set_state_private(em_tree, start, csum);
309 out:
310         if (path)
311                 btrfs_free_path(path);
312         mutex_unlock(&root->fs_info->fs_mutex);
313         return ret;
314 }
315
316 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
317 {
318         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
319         struct inode *inode = page->mapping->host;
320         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
321         char *kaddr;
322         u64 private;
323         int ret;
324         struct btrfs_root *root = BTRFS_I(inode)->root;
325         u32 csum = ~(u32)0;
326         unsigned long flags;
327
328         if (btrfs_test_opt(root, NODATASUM))
329                 return 0;
330
331         ret = get_state_private(em_tree, start, &private);
332         local_irq_save(flags);
333         kaddr = kmap_atomic(page, KM_IRQ0);
334         if (ret) {
335                 goto zeroit;
336         }
337         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
338         btrfs_csum_final(csum, (char *)&csum);
339         if (csum != private) {
340                 goto zeroit;
341         }
342         kunmap_atomic(kaddr, KM_IRQ0);
343         local_irq_restore(flags);
344         return 0;
345
346 zeroit:
347         printk("btrfs csum failed ino %lu off %llu\n",
348                page->mapping->host->i_ino, (unsigned long long)start);
349         memset(kaddr + offset, 1, end - start + 1);
350         flush_dcache_page(page);
351         kunmap_atomic(kaddr, KM_IRQ0);
352         local_irq_restore(flags);
353         return 0;
354 }
355
356 void btrfs_read_locked_inode(struct inode *inode)
357 {
358         struct btrfs_path *path;
359         struct extent_buffer *leaf;
360         struct btrfs_inode_item *inode_item;
361         struct btrfs_inode_timespec *tspec;
362         struct btrfs_root *root = BTRFS_I(inode)->root;
363         struct btrfs_key location;
364         u64 alloc_group_block;
365         u32 rdev;
366         int ret;
367
368         path = btrfs_alloc_path();
369         BUG_ON(!path);
370         mutex_lock(&root->fs_info->fs_mutex);
371         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
372
373         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
374         if (ret)
375                 goto make_bad;
376
377         leaf = path->nodes[0];
378         inode_item = btrfs_item_ptr(leaf, path->slots[0],
379                                     struct btrfs_inode_item);
380
381         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
382         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
383         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
384         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
385         inode->i_size = btrfs_inode_size(leaf, inode_item);
386
387         tspec = btrfs_inode_atime(inode_item);
388         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
389         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
390
391         tspec = btrfs_inode_mtime(inode_item);
392         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
393         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
394
395         tspec = btrfs_inode_ctime(inode_item);
396         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
397         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
398
399         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
400         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
401         inode->i_rdev = 0;
402         rdev = btrfs_inode_rdev(leaf, inode_item);
403
404         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
405         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
406                                                        alloc_group_block);
407
408         if (!BTRFS_I(inode)->block_group) {
409                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
410                                                          NULL, 0, 0, 0);
411         }
412         btrfs_free_path(path);
413         inode_item = NULL;
414
415         mutex_unlock(&root->fs_info->fs_mutex);
416
417         switch (inode->i_mode & S_IFMT) {
418         case S_IFREG:
419                 inode->i_mapping->a_ops = &btrfs_aops;
420                 BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
421                 inode->i_fop = &btrfs_file_operations;
422                 inode->i_op = &btrfs_file_inode_operations;
423                 break;
424         case S_IFDIR:
425                 inode->i_fop = &btrfs_dir_file_operations;
426                 if (root == root->fs_info->tree_root)
427                         inode->i_op = &btrfs_dir_ro_inode_operations;
428                 else
429                         inode->i_op = &btrfs_dir_inode_operations;
430                 break;
431         case S_IFLNK:
432                 inode->i_op = &btrfs_symlink_inode_operations;
433                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
434                 break;
435         default:
436                 init_special_inode(inode, inode->i_mode, rdev);
437                 break;
438         }
439         return;
440
441 make_bad:
442         btrfs_release_path(root, path);
443         btrfs_free_path(path);
444         mutex_unlock(&root->fs_info->fs_mutex);
445         make_bad_inode(inode);
446 }
447
448 static void fill_inode_item(struct extent_buffer *leaf,
449                             struct btrfs_inode_item *item,
450                             struct inode *inode)
451 {
452         btrfs_set_inode_uid(leaf, item, inode->i_uid);
453         btrfs_set_inode_gid(leaf, item, inode->i_gid);
454         btrfs_set_inode_size(leaf, item, inode->i_size);
455         btrfs_set_inode_mode(leaf, item, inode->i_mode);
456         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
457
458         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
459                                inode->i_atime.tv_sec);
460         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
461                                 inode->i_atime.tv_nsec);
462
463         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
464                                inode->i_mtime.tv_sec);
465         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
466                                 inode->i_mtime.tv_nsec);
467
468         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
469                                inode->i_ctime.tv_sec);
470         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
471                                 inode->i_ctime.tv_nsec);
472
473         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
474         btrfs_set_inode_generation(leaf, item, inode->i_generation);
475         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
476         btrfs_set_inode_block_group(leaf, item,
477                                     BTRFS_I(inode)->block_group->key.objectid);
478 }
479
480 int btrfs_update_inode(struct btrfs_trans_handle *trans,
481                               struct btrfs_root *root,
482                               struct inode *inode)
483 {
484         struct btrfs_inode_item *inode_item;
485         struct btrfs_path *path;
486         struct extent_buffer *leaf;
487         int ret;
488
489         path = btrfs_alloc_path();
490         BUG_ON(!path);
491         ret = btrfs_lookup_inode(trans, root, path,
492                                  &BTRFS_I(inode)->location, 1);
493         if (ret) {
494                 if (ret > 0)
495                         ret = -ENOENT;
496                 goto failed;
497         }
498
499         leaf = path->nodes[0];
500         inode_item = btrfs_item_ptr(leaf, path->slots[0],
501                                   struct btrfs_inode_item);
502
503         fill_inode_item(leaf, inode_item, inode);
504         btrfs_mark_buffer_dirty(leaf);
505         btrfs_set_inode_last_trans(trans, inode);
506         ret = 0;
507 failed:
508         btrfs_release_path(root, path);
509         btrfs_free_path(path);
510         return ret;
511 }
512
513
514 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
515                               struct btrfs_root *root,
516                               struct inode *dir,
517                               struct dentry *dentry)
518 {
519         struct btrfs_path *path;
520         const char *name = dentry->d_name.name;
521         int name_len = dentry->d_name.len;
522         int ret = 0;
523         struct extent_buffer *leaf;
524         struct btrfs_dir_item *di;
525         struct btrfs_key key;
526
527         path = btrfs_alloc_path();
528         if (!path) {
529                 ret = -ENOMEM;
530                 goto err;
531         }
532
533         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
534                                     name, name_len, -1);
535         if (IS_ERR(di)) {
536                 ret = PTR_ERR(di);
537                 goto err;
538         }
539         if (!di) {
540                 ret = -ENOENT;
541                 goto err;
542         }
543         leaf = path->nodes[0];
544         btrfs_dir_item_key_to_cpu(leaf, di, &key);
545         ret = btrfs_delete_one_dir_name(trans, root, path, di);
546         if (ret)
547                 goto err;
548         btrfs_release_path(root, path);
549
550         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
551                                          key.objectid, name, name_len, -1);
552         if (IS_ERR(di)) {
553                 ret = PTR_ERR(di);
554                 goto err;
555         }
556         if (!di) {
557                 ret = -ENOENT;
558                 goto err;
559         }
560         ret = btrfs_delete_one_dir_name(trans, root, path, di);
561
562         dentry->d_inode->i_ctime = dir->i_ctime;
563         ret = btrfs_del_inode_ref(trans, root, name, name_len,
564                                   dentry->d_inode->i_ino,
565                                   dentry->d_parent->d_inode->i_ino);
566         if (ret) {
567                 printk("failed to delete reference to %.*s, "
568                        "inode %lu parent %lu\n", name_len, name,
569                        dentry->d_inode->i_ino,
570                        dentry->d_parent->d_inode->i_ino);
571         }
572 err:
573         btrfs_free_path(path);
574         if (!ret) {
575                 dir->i_size -= name_len * 2;
576                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
577                 btrfs_update_inode(trans, root, dir);
578 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
579                 dentry->d_inode->i_nlink--;
580 #else
581                 drop_nlink(dentry->d_inode);
582 #endif
583                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
584                 dir->i_sb->s_dirt = 1;
585         }
586         return ret;
587 }
588
589 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
590 {
591         struct btrfs_root *root;
592         struct btrfs_trans_handle *trans;
593         int ret;
594         unsigned long nr = 0;
595
596         root = BTRFS_I(dir)->root;
597         mutex_lock(&root->fs_info->fs_mutex);
598
599         ret = btrfs_check_free_space(root, 1, 1);
600         if (ret)
601                 goto fail;
602
603         trans = btrfs_start_transaction(root, 1);
604
605         btrfs_set_trans_block_group(trans, dir);
606         ret = btrfs_unlink_trans(trans, root, dir, dentry);
607         nr = trans->blocks_used;
608
609         btrfs_end_transaction(trans, root);
610 fail:
611         mutex_unlock(&root->fs_info->fs_mutex);
612         btrfs_btree_balance_dirty(root, nr);
613         btrfs_throttle(root);
614         return ret;
615 }
616
617 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
618 {
619         struct inode *inode = dentry->d_inode;
620         int err = 0;
621         int ret;
622         struct btrfs_root *root = BTRFS_I(dir)->root;
623         struct btrfs_trans_handle *trans;
624         unsigned long nr = 0;
625
626         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
627                 return -ENOTEMPTY;
628
629         mutex_lock(&root->fs_info->fs_mutex);
630         ret = btrfs_check_free_space(root, 1, 1);
631         if (ret)
632                 goto fail;
633
634         trans = btrfs_start_transaction(root, 1);
635         btrfs_set_trans_block_group(trans, dir);
636
637         /* now the directory is empty */
638         err = btrfs_unlink_trans(trans, root, dir, dentry);
639         if (!err) {
640                 inode->i_size = 0;
641         }
642
643         nr = trans->blocks_used;
644         ret = btrfs_end_transaction(trans, root);
645 fail:
646         mutex_unlock(&root->fs_info->fs_mutex);
647         btrfs_btree_balance_dirty(root, nr);
648         btrfs_throttle(root);
649
650         if (ret && !err)
651                 err = ret;
652         return err;
653 }
654
655 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
656                             struct btrfs_root *root,
657                             struct inode *inode)
658 {
659         struct btrfs_path *path;
660         int ret;
661
662         clear_inode(inode);
663
664         path = btrfs_alloc_path();
665         BUG_ON(!path);
666         ret = btrfs_lookup_inode(trans, root, path,
667                                  &BTRFS_I(inode)->location, -1);
668         if (ret > 0)
669                 ret = -ENOENT;
670         if (!ret)
671                 ret = btrfs_del_item(trans, root, path);
672         btrfs_free_path(path);
673         return ret;
674 }
675
676 /*
677  * this can truncate away extent items, csum items and directory items.
678  * It starts at a high offset and removes keys until it can't find
679  * any higher than i_size.
680  *
681  * csum items that cross the new i_size are truncated to the new size
682  * as well.
683  */
684 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
685                                    struct btrfs_root *root,
686                                    struct inode *inode)
687 {
688         int ret;
689         struct btrfs_path *path;
690         struct btrfs_key key;
691         struct btrfs_key found_key;
692         u32 found_type;
693         struct extent_buffer *leaf;
694         struct btrfs_file_extent_item *fi;
695         u64 extent_start = 0;
696         u64 extent_num_bytes = 0;
697         u64 item_end = 0;
698         u64 root_gen = 0;
699         u64 root_owner = 0;
700         int found_extent;
701         int del_item;
702         int extent_type = -1;
703
704         btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
705         path = btrfs_alloc_path();
706         path->reada = -1;
707         BUG_ON(!path);
708
709         /* FIXME, add redo link to tree so we don't leak on crash */
710         key.objectid = inode->i_ino;
711         key.offset = (u64)-1;
712         key.type = (u8)-1;
713
714         while(1) {
715                 btrfs_init_path(path);
716                 fi = NULL;
717                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
718                 if (ret < 0) {
719                         goto error;
720                 }
721                 if (ret > 0) {
722                         BUG_ON(path->slots[0] == 0);
723                         path->slots[0]--;
724                 }
725                 leaf = path->nodes[0];
726                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
727                 found_type = btrfs_key_type(&found_key);
728
729                 if (found_key.objectid != inode->i_ino)
730                         break;
731
732                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
733                     found_type != BTRFS_DIR_ITEM_KEY &&
734                     found_type != BTRFS_DIR_INDEX_KEY &&
735                     found_type != BTRFS_EXTENT_DATA_KEY)
736                         break;
737
738                 item_end = found_key.offset;
739                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
740                         fi = btrfs_item_ptr(leaf, path->slots[0],
741                                             struct btrfs_file_extent_item);
742                         extent_type = btrfs_file_extent_type(leaf, fi);
743                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
744                                 item_end +=
745                                     btrfs_file_extent_num_bytes(leaf, fi);
746                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
747                                 struct btrfs_item *item = btrfs_item_nr(leaf,
748                                                                 path->slots[0]);
749                                 item_end += btrfs_file_extent_inline_len(leaf,
750                                                                          item);
751                         }
752                         item_end--;
753                 }
754                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
755                         ret = btrfs_csum_truncate(trans, root, path,
756                                                   inode->i_size);
757                         BUG_ON(ret);
758                 }
759                 if (item_end < inode->i_size) {
760                         if (found_type == BTRFS_DIR_ITEM_KEY) {
761                                 found_type = BTRFS_INODE_ITEM_KEY;
762                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
763                                 found_type = BTRFS_CSUM_ITEM_KEY;
764                         } else if (found_type) {
765                                 found_type--;
766                         } else {
767                                 break;
768                         }
769                         btrfs_set_key_type(&key, found_type);
770                         btrfs_release_path(root, path);
771                         continue;
772                 }
773                 if (found_key.offset >= inode->i_size)
774                         del_item = 1;
775                 else
776                         del_item = 0;
777                 found_extent = 0;
778
779                 /* FIXME, shrink the extent if the ref count is only 1 */
780                 if (found_type != BTRFS_EXTENT_DATA_KEY)
781                         goto delete;
782
783                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
784                         u64 num_dec;
785                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
786                         if (!del_item) {
787                                 u64 orig_num_bytes =
788                                         btrfs_file_extent_num_bytes(leaf, fi);
789                                 extent_num_bytes = inode->i_size -
790                                         found_key.offset + root->sectorsize - 1;
791                                 btrfs_set_file_extent_num_bytes(leaf, fi,
792                                                          extent_num_bytes);
793                                 num_dec = (orig_num_bytes -
794                                            extent_num_bytes) >> 9;
795                                 if (extent_start != 0) {
796                                         inode->i_blocks -= num_dec;
797                                 }
798                                 btrfs_mark_buffer_dirty(leaf);
799                         } else {
800                                 extent_num_bytes =
801                                         btrfs_file_extent_disk_num_bytes(leaf,
802                                                                          fi);
803                                 /* FIXME blocksize != 4096 */
804                                 num_dec = btrfs_file_extent_num_bytes(leaf,
805                                                                        fi) >> 9;
806                                 if (extent_start != 0) {
807                                         found_extent = 1;
808                                         inode->i_blocks -= num_dec;
809                                 }
810                                 root_gen = btrfs_header_generation(leaf);
811                                 root_owner = btrfs_header_owner(leaf);
812                         }
813                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE &&
814                            !del_item) {
815                         u32 newsize = inode->i_size - found_key.offset;
816                         newsize = btrfs_file_extent_calc_inline_size(newsize);
817                         ret = btrfs_truncate_item(trans, root, path,
818                                                   newsize, 1);
819                         BUG_ON(ret);
820                 }
821 delete:
822                 if (del_item) {
823                         ret = btrfs_del_item(trans, root, path);
824                         if (ret)
825                                 goto error;
826                 } else {
827                         break;
828                 }
829                 btrfs_release_path(root, path);
830                 if (found_extent) {
831                         ret = btrfs_free_extent(trans, root, extent_start,
832                                                 extent_num_bytes,
833                                                 root_owner,
834                                                 root_gen, inode->i_ino,
835                                                 found_key.offset, 0);
836                         BUG_ON(ret);
837                 }
838         }
839         ret = 0;
840 error:
841         btrfs_release_path(root, path);
842         btrfs_free_path(path);
843         inode->i_sb->s_dirt = 1;
844         return ret;
845 }
846
847 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
848                               size_t zero_start)
849 {
850         char *kaddr;
851         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
852         struct btrfs_root *root = BTRFS_I(inode)->root;
853         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
854         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
855         u64 existing_delalloc;
856         u64 delalloc_start;
857         int ret = 0;
858
859         WARN_ON(!PageLocked(page));
860         set_page_extent_mapped(page);
861
862         lock_extent(em_tree, page_start, page_end, GFP_NOFS);
863         delalloc_start = page_start;
864         existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree,
865                                              &delalloc_start, page_end,
866                                              PAGE_CACHE_SIZE, EXTENT_DELALLOC);
867         set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start,
868                             page_end, GFP_NOFS);
869
870         spin_lock(&root->fs_info->delalloc_lock);
871         root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - existing_delalloc;
872         spin_unlock(&root->fs_info->delalloc_lock);
873
874         if (zero_start != PAGE_CACHE_SIZE) {
875                 kaddr = kmap(page);
876                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
877                 flush_dcache_page(page);
878                 kunmap(page);
879         }
880         set_page_dirty(page);
881         unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
882
883         return ret;
884 }
885
886 /*
887  * taken from block_truncate_page, but does cow as it zeros out
888  * any bytes left in the last page in the file.
889  */
890 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
891 {
892         struct inode *inode = mapping->host;
893         struct btrfs_root *root = BTRFS_I(inode)->root;
894         u32 blocksize = root->sectorsize;
895         pgoff_t index = from >> PAGE_CACHE_SHIFT;
896         unsigned offset = from & (PAGE_CACHE_SIZE-1);
897         struct page *page;
898         int ret = 0;
899         u64 page_start;
900
901         if ((offset & (blocksize - 1)) == 0)
902                 goto out;
903
904         ret = -ENOMEM;
905         page = grab_cache_page(mapping, index);
906         if (!page)
907                 goto out;
908         if (!PageUptodate(page)) {
909                 ret = btrfs_readpage(NULL, page);
910                 lock_page(page);
911                 if (!PageUptodate(page)) {
912                         ret = -EIO;
913                         goto out;
914                 }
915         }
916         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
917
918         ret = btrfs_cow_one_page(inode, page, offset);
919
920         unlock_page(page);
921         page_cache_release(page);
922 out:
923         return ret;
924 }
925
926 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
927 {
928         struct inode *inode = dentry->d_inode;
929         int err;
930
931         err = inode_change_ok(inode, attr);
932         if (err)
933                 return err;
934
935         if (S_ISREG(inode->i_mode) &&
936             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
937                 struct btrfs_trans_handle *trans;
938                 struct btrfs_root *root = BTRFS_I(inode)->root;
939                 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
940
941                 u64 mask = root->sectorsize - 1;
942                 u64 pos = (inode->i_size + mask) & ~mask;
943                 u64 block_end = attr->ia_size | mask;
944                 u64 hole_size;
945                 u64 alloc_hint = 0;
946
947                 if (attr->ia_size <= pos)
948                         goto out;
949
950                 mutex_lock(&root->fs_info->fs_mutex);
951                 err = btrfs_check_free_space(root, 1, 0);
952                 mutex_unlock(&root->fs_info->fs_mutex);
953                 if (err)
954                         goto fail;
955
956                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
957
958                 lock_extent(em_tree, pos, block_end, GFP_NOFS);
959                 hole_size = (attr->ia_size - pos + mask) & ~mask;
960
961                 mutex_lock(&root->fs_info->fs_mutex);
962                 trans = btrfs_start_transaction(root, 1);
963                 btrfs_set_trans_block_group(trans, inode);
964                 err = btrfs_drop_extents(trans, root, inode,
965                                          pos, pos + hole_size, pos,
966                                          &alloc_hint);
967
968                 if (alloc_hint != EXTENT_MAP_INLINE) {
969                         err = btrfs_insert_file_extent(trans, root,
970                                                        inode->i_ino,
971                                                        pos, 0, 0, hole_size);
972                 }
973                 btrfs_end_transaction(trans, root);
974                 mutex_unlock(&root->fs_info->fs_mutex);
975                 unlock_extent(em_tree, pos, block_end, GFP_NOFS);
976                 if (err)
977                         return err;
978         }
979 out:
980         err = inode_setattr(inode, attr);
981 fail:
982         return err;
983 }
984 void btrfs_delete_inode(struct inode *inode)
985 {
986         struct btrfs_trans_handle *trans;
987         struct btrfs_root *root = BTRFS_I(inode)->root;
988         unsigned long nr;
989         int ret;
990
991         truncate_inode_pages(&inode->i_data, 0);
992         if (is_bad_inode(inode)) {
993                 goto no_delete;
994         }
995
996         inode->i_size = 0;
997         mutex_lock(&root->fs_info->fs_mutex);
998         trans = btrfs_start_transaction(root, 1);
999
1000         btrfs_set_trans_block_group(trans, inode);
1001         ret = btrfs_truncate_in_trans(trans, root, inode);
1002         if (ret)
1003                 goto no_delete_lock;
1004         ret = btrfs_delete_xattrs(trans, root, inode);
1005         if (ret)
1006                 goto no_delete_lock;
1007         ret = btrfs_free_inode(trans, root, inode);
1008         if (ret)
1009                 goto no_delete_lock;
1010         nr = trans->blocks_used;
1011
1012         btrfs_end_transaction(trans, root);
1013         mutex_unlock(&root->fs_info->fs_mutex);
1014         btrfs_btree_balance_dirty(root, nr);
1015         btrfs_throttle(root);
1016         return;
1017
1018 no_delete_lock:
1019         nr = trans->blocks_used;
1020         btrfs_end_transaction(trans, root);
1021         mutex_unlock(&root->fs_info->fs_mutex);
1022         btrfs_btree_balance_dirty(root, nr);
1023         btrfs_throttle(root);
1024 no_delete:
1025         clear_inode(inode);
1026 }
1027
1028 /*
1029  * this returns the key found in the dir entry in the location pointer.
1030  * If no dir entries were found, location->objectid is 0.
1031  */
1032 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1033                                struct btrfs_key *location)
1034 {
1035         const char *name = dentry->d_name.name;
1036         int namelen = dentry->d_name.len;
1037         struct btrfs_dir_item *di;
1038         struct btrfs_path *path;
1039         struct btrfs_root *root = BTRFS_I(dir)->root;
1040         int ret = 0;
1041
1042         if (namelen == 1 && strcmp(name, ".") == 0) {
1043                 location->objectid = dir->i_ino;
1044                 location->type = BTRFS_INODE_ITEM_KEY;
1045                 location->offset = 0;
1046                 return 0;
1047         }
1048         path = btrfs_alloc_path();
1049         BUG_ON(!path);
1050
1051         if (namelen == 2 && strcmp(name, "..") == 0) {
1052                 struct btrfs_key key;
1053                 struct extent_buffer *leaf;
1054                 u32 nritems;
1055                 int slot;
1056
1057                 key.objectid = dir->i_ino;
1058                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1059                 key.offset = 0;
1060                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1061                 BUG_ON(ret == 0);
1062                 ret = 0;
1063
1064                 leaf = path->nodes[0];
1065                 slot = path->slots[0];
1066                 nritems = btrfs_header_nritems(leaf);
1067                 if (slot >= nritems)
1068                         goto out_err;
1069
1070                 btrfs_item_key_to_cpu(leaf, &key, slot);
1071                 if (key.objectid != dir->i_ino ||
1072                     key.type != BTRFS_INODE_REF_KEY) {
1073                         goto out_err;
1074                 }
1075                 location->objectid = key.offset;
1076                 location->type = BTRFS_INODE_ITEM_KEY;
1077                 location->offset = 0;
1078                 goto out;
1079         }
1080
1081         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1082                                     namelen, 0);
1083         if (IS_ERR(di))
1084                 ret = PTR_ERR(di);
1085         if (!di || IS_ERR(di)) {
1086                 goto out_err;
1087         }
1088         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1089 out:
1090         btrfs_free_path(path);
1091         return ret;
1092 out_err:
1093         location->objectid = 0;
1094         goto out;
1095 }
1096
1097 /*
1098  * when we hit a tree root in a directory, the btrfs part of the inode
1099  * needs to be changed to reflect the root directory of the tree root.  This
1100  * is kind of like crossing a mount point.
1101  */
1102 static int fixup_tree_root_location(struct btrfs_root *root,
1103                              struct btrfs_key *location,
1104                              struct btrfs_root **sub_root,
1105                              struct dentry *dentry)
1106 {
1107         struct btrfs_path *path;
1108         struct btrfs_root_item *ri;
1109
1110         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1111                 return 0;
1112         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1113                 return 0;
1114
1115         path = btrfs_alloc_path();
1116         BUG_ON(!path);
1117         mutex_lock(&root->fs_info->fs_mutex);
1118
1119         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1120                                         dentry->d_name.name,
1121                                         dentry->d_name.len);
1122         if (IS_ERR(*sub_root))
1123                 return PTR_ERR(*sub_root);
1124
1125         ri = &(*sub_root)->root_item;
1126         location->objectid = btrfs_root_dirid(ri);
1127         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1128         location->offset = 0;
1129
1130         btrfs_free_path(path);
1131         mutex_unlock(&root->fs_info->fs_mutex);
1132         return 0;
1133 }
1134
1135 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1136 {
1137         struct btrfs_iget_args *args = p;
1138         inode->i_ino = args->ino;
1139         BTRFS_I(inode)->root = args->root;
1140         extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1141                              inode->i_mapping, GFP_NOFS);
1142         return 0;
1143 }
1144
1145 static int btrfs_find_actor(struct inode *inode, void *opaque)
1146 {
1147         struct btrfs_iget_args *args = opaque;
1148         return (args->ino == inode->i_ino &&
1149                 args->root == BTRFS_I(inode)->root);
1150 }
1151
1152 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1153                             u64 root_objectid)
1154 {
1155         struct btrfs_iget_args args;
1156         args.ino = objectid;
1157         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1158
1159         if (!args.root)
1160                 return NULL;
1161
1162         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1163 }
1164
1165 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1166                                 struct btrfs_root *root)
1167 {
1168         struct inode *inode;
1169         struct btrfs_iget_args args;
1170         args.ino = objectid;
1171         args.root = root;
1172
1173         inode = iget5_locked(s, objectid, btrfs_find_actor,
1174                              btrfs_init_locked_inode,
1175                              (void *)&args);
1176         return inode;
1177 }
1178
1179 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1180                                    struct nameidata *nd)
1181 {
1182         struct inode * inode;
1183         struct btrfs_inode *bi = BTRFS_I(dir);
1184         struct btrfs_root *root = bi->root;
1185         struct btrfs_root *sub_root = root;
1186         struct btrfs_key location;
1187         int ret;
1188
1189         if (dentry->d_name.len > BTRFS_NAME_LEN)
1190                 return ERR_PTR(-ENAMETOOLONG);
1191
1192         mutex_lock(&root->fs_info->fs_mutex);
1193         ret = btrfs_inode_by_name(dir, dentry, &location);
1194         mutex_unlock(&root->fs_info->fs_mutex);
1195
1196         if (ret < 0)
1197                 return ERR_PTR(ret);
1198
1199         inode = NULL;
1200         if (location.objectid) {
1201                 ret = fixup_tree_root_location(root, &location, &sub_root,
1202                                                 dentry);
1203                 if (ret < 0)
1204                         return ERR_PTR(ret);
1205                 if (ret > 0)
1206                         return ERR_PTR(-ENOENT);
1207                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1208                                           sub_root);
1209                 if (!inode)
1210                         return ERR_PTR(-EACCES);
1211                 if (inode->i_state & I_NEW) {
1212                         /* the inode and parent dir are two different roots */
1213                         if (sub_root != root) {
1214                                 igrab(inode);
1215                                 sub_root->inode = inode;
1216                         }
1217                         BTRFS_I(inode)->root = sub_root;
1218                         memcpy(&BTRFS_I(inode)->location, &location,
1219                                sizeof(location));
1220                         btrfs_read_locked_inode(inode);
1221                         unlock_new_inode(inode);
1222                 }
1223         }
1224         return d_splice_alias(inode, dentry);
1225 }
1226
1227 static unsigned char btrfs_filetype_table[] = {
1228         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1229 };
1230
1231 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1232 {
1233         struct inode *inode = filp->f_dentry->d_inode;
1234         struct btrfs_root *root = BTRFS_I(inode)->root;
1235         struct btrfs_item *item;
1236         struct btrfs_dir_item *di;
1237         struct btrfs_key key;
1238         struct btrfs_key found_key;
1239         struct btrfs_path *path;
1240         int ret;
1241         u32 nritems;
1242         struct extent_buffer *leaf;
1243         int slot;
1244         int advance;
1245         unsigned char d_type;
1246         int over = 0;
1247         u32 di_cur;
1248         u32 di_total;
1249         u32 di_len;
1250         int key_type = BTRFS_DIR_INDEX_KEY;
1251         char tmp_name[32];
1252         char *name_ptr;
1253         int name_len;
1254
1255         /* FIXME, use a real flag for deciding about the key type */
1256         if (root->fs_info->tree_root == root)
1257                 key_type = BTRFS_DIR_ITEM_KEY;
1258
1259         /* special case for "." */
1260         if (filp->f_pos == 0) {
1261                 over = filldir(dirent, ".", 1,
1262                                1, inode->i_ino,
1263                                DT_DIR);
1264                 if (over)
1265                         return 0;
1266                 filp->f_pos = 1;
1267         }
1268
1269         mutex_lock(&root->fs_info->fs_mutex);
1270         key.objectid = inode->i_ino;
1271         path = btrfs_alloc_path();
1272         path->reada = 2;
1273
1274         /* special case for .., just use the back ref */
1275         if (filp->f_pos == 1) {
1276                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1277                 key.offset = 0;
1278                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1279                 BUG_ON(ret == 0);
1280                 leaf = path->nodes[0];
1281                 slot = path->slots[0];
1282                 nritems = btrfs_header_nritems(leaf);
1283                 if (slot >= nritems) {
1284                         btrfs_release_path(root, path);
1285                         goto read_dir_items;
1286                 }
1287                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1288                 btrfs_release_path(root, path);
1289                 if (found_key.objectid != key.objectid ||
1290                     found_key.type != BTRFS_INODE_REF_KEY)
1291                         goto read_dir_items;
1292                 over = filldir(dirent, "..", 2,
1293                                2, found_key.offset, DT_DIR);
1294                 if (over)
1295                         goto nopos;
1296                 filp->f_pos = 2;
1297         }
1298
1299 read_dir_items:
1300         btrfs_set_key_type(&key, key_type);
1301         key.offset = filp->f_pos;
1302
1303         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1304         if (ret < 0)
1305                 goto err;
1306         advance = 0;
1307         while(1) {
1308                 leaf = path->nodes[0];
1309                 nritems = btrfs_header_nritems(leaf);
1310                 slot = path->slots[0];
1311                 if (advance || slot >= nritems) {
1312                         if (slot >= nritems -1) {
1313                                 ret = btrfs_next_leaf(root, path);
1314                                 if (ret)
1315                                         break;
1316                                 leaf = path->nodes[0];
1317                                 nritems = btrfs_header_nritems(leaf);
1318                                 slot = path->slots[0];
1319                         } else {
1320                                 slot++;
1321                                 path->slots[0]++;
1322                         }
1323                 }
1324                 advance = 1;
1325                 item = btrfs_item_nr(leaf, slot);
1326                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1327
1328                 if (found_key.objectid != key.objectid)
1329                         break;
1330                 if (btrfs_key_type(&found_key) != key_type)
1331                         break;
1332                 if (found_key.offset < filp->f_pos)
1333                         continue;
1334
1335                 filp->f_pos = found_key.offset;
1336                 advance = 1;
1337                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1338                 di_cur = 0;
1339                 di_total = btrfs_item_size(leaf, item);
1340                 while(di_cur < di_total) {
1341                         struct btrfs_key location;
1342
1343                         name_len = btrfs_dir_name_len(leaf, di);
1344                         if (name_len < 32) {
1345                                 name_ptr = tmp_name;
1346                         } else {
1347                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1348                                 BUG_ON(!name_ptr);
1349                         }
1350                         read_extent_buffer(leaf, name_ptr,
1351                                            (unsigned long)(di + 1), name_len);
1352
1353                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1354                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1355                         over = filldir(dirent, name_ptr, name_len,
1356                                        found_key.offset,
1357                                        location.objectid,
1358                                        d_type);
1359
1360                         if (name_ptr != tmp_name)
1361                                 kfree(name_ptr);
1362
1363                         if (over)
1364                                 goto nopos;
1365                         di_len = btrfs_dir_name_len(leaf, di) +
1366                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1367                         di_cur += di_len;
1368                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1369                 }
1370         }
1371         filp->f_pos++;
1372 nopos:
1373         ret = 0;
1374 err:
1375         btrfs_release_path(root, path);
1376         btrfs_free_path(path);
1377         mutex_unlock(&root->fs_info->fs_mutex);
1378         return ret;
1379 }
1380
1381 int btrfs_write_inode(struct inode *inode, int wait)
1382 {
1383         struct btrfs_root *root = BTRFS_I(inode)->root;
1384         struct btrfs_trans_handle *trans;
1385         int ret = 0;
1386
1387         if (wait) {
1388                 mutex_lock(&root->fs_info->fs_mutex);
1389                 trans = btrfs_start_transaction(root, 1);
1390                 btrfs_set_trans_block_group(trans, inode);
1391                 ret = btrfs_commit_transaction(trans, root);
1392                 mutex_unlock(&root->fs_info->fs_mutex);
1393         }
1394         return ret;
1395 }
1396
1397 /*
1398  * This is somewhat expensive, updating the tree every time the
1399  * inode changes.  But, it is most likely to find the inode in cache.
1400  * FIXME, needs more benchmarking...there are no reasons other than performance
1401  * to keep or drop this code.
1402  */
1403 void btrfs_dirty_inode(struct inode *inode)
1404 {
1405         struct btrfs_root *root = BTRFS_I(inode)->root;
1406         struct btrfs_trans_handle *trans;
1407
1408         mutex_lock(&root->fs_info->fs_mutex);
1409         trans = btrfs_start_transaction(root, 1);
1410         btrfs_set_trans_block_group(trans, inode);
1411         btrfs_update_inode(trans, root, inode);
1412         btrfs_end_transaction(trans, root);
1413         mutex_unlock(&root->fs_info->fs_mutex);
1414 }
1415
1416 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1417                                      struct btrfs_root *root,
1418                                      u64 objectid,
1419                                      struct btrfs_block_group_cache *group,
1420                                      int mode)
1421 {
1422         struct inode *inode;
1423         struct btrfs_inode_item *inode_item;
1424         struct btrfs_key *location;
1425         struct btrfs_path *path;
1426         int ret;
1427         int owner;
1428
1429         path = btrfs_alloc_path();
1430         BUG_ON(!path);
1431
1432         inode = new_inode(root->fs_info->sb);
1433         if (!inode)
1434                 return ERR_PTR(-ENOMEM);
1435
1436         extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1437                              inode->i_mapping, GFP_NOFS);
1438         BTRFS_I(inode)->root = root;
1439
1440         if (mode & S_IFDIR)
1441                 owner = 0;
1442         else
1443                 owner = 1;
1444         group = btrfs_find_block_group(root, group, 0, 0, owner);
1445         BTRFS_I(inode)->block_group = group;
1446
1447         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
1448         if (ret)
1449                 goto fail;
1450
1451         inode->i_uid = current->fsuid;
1452         inode->i_gid = current->fsgid;
1453         inode->i_mode = mode;
1454         inode->i_ino = objectid;
1455         inode->i_blocks = 0;
1456         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1457         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1458                                   struct btrfs_inode_item);
1459         fill_inode_item(path->nodes[0], inode_item, inode);
1460         btrfs_mark_buffer_dirty(path->nodes[0]);
1461         btrfs_free_path(path);
1462
1463         location = &BTRFS_I(inode)->location;
1464         location->objectid = objectid;
1465         location->offset = 0;
1466         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1467
1468         insert_inode_hash(inode);
1469         return inode;
1470 fail:
1471         btrfs_free_path(path);
1472         return ERR_PTR(ret);
1473 }
1474
1475 static inline u8 btrfs_inode_type(struct inode *inode)
1476 {
1477         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1478 }
1479
1480 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1481                             struct dentry *dentry, struct inode *inode)
1482 {
1483         int ret;
1484         struct btrfs_key key;
1485         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1486         struct inode *parent_inode;
1487
1488         key.objectid = inode->i_ino;
1489         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1490         key.offset = 0;
1491
1492         ret = btrfs_insert_dir_item(trans, root,
1493                                     dentry->d_name.name, dentry->d_name.len,
1494                                     dentry->d_parent->d_inode->i_ino,
1495                                     &key, btrfs_inode_type(inode));
1496         if (ret == 0) {
1497                 ret = btrfs_insert_inode_ref(trans, root,
1498                                      dentry->d_name.name,
1499                                      dentry->d_name.len,
1500                                      inode->i_ino,
1501                                      dentry->d_parent->d_inode->i_ino);
1502                 parent_inode = dentry->d_parent->d_inode;
1503                 parent_inode->i_size += dentry->d_name.len * 2;
1504                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1505                 ret = btrfs_update_inode(trans, root,
1506                                          dentry->d_parent->d_inode);
1507         }
1508         return ret;
1509 }
1510
1511 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1512                             struct dentry *dentry, struct inode *inode)
1513 {
1514         int err = btrfs_add_link(trans, dentry, inode);
1515         if (!err) {
1516                 d_instantiate(dentry, inode);
1517                 return 0;
1518         }
1519         if (err > 0)
1520                 err = -EEXIST;
1521         return err;
1522 }
1523
1524 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1525                         int mode, dev_t rdev)
1526 {
1527         struct btrfs_trans_handle *trans;
1528         struct btrfs_root *root = BTRFS_I(dir)->root;
1529         struct inode *inode = NULL;
1530         int err;
1531         int drop_inode = 0;
1532         u64 objectid;
1533         unsigned long nr = 0;
1534
1535         if (!new_valid_dev(rdev))
1536                 return -EINVAL;
1537
1538         mutex_lock(&root->fs_info->fs_mutex);
1539         err = btrfs_check_free_space(root, 1, 0);
1540         if (err)
1541                 goto fail;
1542
1543         trans = btrfs_start_transaction(root, 1);
1544         btrfs_set_trans_block_group(trans, dir);
1545
1546         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1547         if (err) {
1548                 err = -ENOSPC;
1549                 goto out_unlock;
1550         }
1551
1552         inode = btrfs_new_inode(trans, root, objectid,
1553                                 BTRFS_I(dir)->block_group, mode);
1554         err = PTR_ERR(inode);
1555         if (IS_ERR(inode))
1556                 goto out_unlock;
1557
1558         btrfs_set_trans_block_group(trans, inode);
1559         err = btrfs_add_nondir(trans, dentry, inode);
1560         if (err)
1561                 drop_inode = 1;
1562         else {
1563                 inode->i_op = &btrfs_special_inode_operations;
1564                 init_special_inode(inode, inode->i_mode, rdev);
1565                 btrfs_update_inode(trans, root, inode);
1566         }
1567         dir->i_sb->s_dirt = 1;
1568         btrfs_update_inode_block_group(trans, inode);
1569         btrfs_update_inode_block_group(trans, dir);
1570 out_unlock:
1571         nr = trans->blocks_used;
1572         btrfs_end_transaction(trans, root);
1573 fail:
1574         mutex_unlock(&root->fs_info->fs_mutex);
1575
1576         if (drop_inode) {
1577                 inode_dec_link_count(inode);
1578                 iput(inode);
1579         }
1580         btrfs_btree_balance_dirty(root, nr);
1581         btrfs_throttle(root);
1582         return err;
1583 }
1584
1585 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1586                         int mode, struct nameidata *nd)
1587 {
1588         struct btrfs_trans_handle *trans;
1589         struct btrfs_root *root = BTRFS_I(dir)->root;
1590         struct inode *inode = NULL;
1591         int err;
1592         int drop_inode = 0;
1593         unsigned long nr = 0;
1594         u64 objectid;
1595
1596         mutex_lock(&root->fs_info->fs_mutex);
1597         err = btrfs_check_free_space(root, 1, 0);
1598         if (err)
1599                 goto fail;
1600         trans = btrfs_start_transaction(root, 1);
1601         btrfs_set_trans_block_group(trans, dir);
1602
1603         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1604         if (err) {
1605                 err = -ENOSPC;
1606                 goto out_unlock;
1607         }
1608
1609         inode = btrfs_new_inode(trans, root, objectid,
1610                                 BTRFS_I(dir)->block_group, mode);
1611         err = PTR_ERR(inode);
1612         if (IS_ERR(inode))
1613                 goto out_unlock;
1614
1615         btrfs_set_trans_block_group(trans, inode);
1616         err = btrfs_add_nondir(trans, dentry, inode);
1617         if (err)
1618                 drop_inode = 1;
1619         else {
1620                 inode->i_mapping->a_ops = &btrfs_aops;
1621                 inode->i_fop = &btrfs_file_operations;
1622                 inode->i_op = &btrfs_file_inode_operations;
1623                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1624                                      inode->i_mapping, GFP_NOFS);
1625                 BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
1626         }
1627         dir->i_sb->s_dirt = 1;
1628         btrfs_update_inode_block_group(trans, inode);
1629         btrfs_update_inode_block_group(trans, dir);
1630 out_unlock:
1631         nr = trans->blocks_used;
1632         btrfs_end_transaction(trans, root);
1633 fail:
1634         mutex_unlock(&root->fs_info->fs_mutex);
1635
1636         if (drop_inode) {
1637                 inode_dec_link_count(inode);
1638                 iput(inode);
1639         }
1640         btrfs_btree_balance_dirty(root, nr);
1641         btrfs_throttle(root);
1642         return err;
1643 }
1644
1645 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1646                       struct dentry *dentry)
1647 {
1648         struct btrfs_trans_handle *trans;
1649         struct btrfs_root *root = BTRFS_I(dir)->root;
1650         struct inode *inode = old_dentry->d_inode;
1651         unsigned long nr = 0;
1652         int err;
1653         int drop_inode = 0;
1654
1655         if (inode->i_nlink == 0)
1656                 return -ENOENT;
1657
1658 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1659         inode->i_nlink++;
1660 #else
1661         inc_nlink(inode);
1662 #endif
1663         mutex_lock(&root->fs_info->fs_mutex);
1664         err = btrfs_check_free_space(root, 1, 0);
1665         if (err)
1666                 goto fail;
1667         trans = btrfs_start_transaction(root, 1);
1668
1669         btrfs_set_trans_block_group(trans, dir);
1670         atomic_inc(&inode->i_count);
1671         err = btrfs_add_nondir(trans, dentry, inode);
1672
1673         if (err)
1674                 drop_inode = 1;
1675
1676         dir->i_sb->s_dirt = 1;
1677         btrfs_update_inode_block_group(trans, dir);
1678         err = btrfs_update_inode(trans, root, inode);
1679
1680         if (err)
1681                 drop_inode = 1;
1682
1683         nr = trans->blocks_used;
1684         btrfs_end_transaction(trans, root);
1685 fail:
1686         mutex_unlock(&root->fs_info->fs_mutex);
1687
1688         if (drop_inode) {
1689                 inode_dec_link_count(inode);
1690                 iput(inode);
1691         }
1692         btrfs_btree_balance_dirty(root, nr);
1693         btrfs_throttle(root);
1694         return err;
1695 }
1696
1697 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1698 {
1699         struct inode *inode;
1700         struct btrfs_trans_handle *trans;
1701         struct btrfs_root *root = BTRFS_I(dir)->root;
1702         int err = 0;
1703         int drop_on_err = 0;
1704         u64 objectid;
1705         unsigned long nr = 1;
1706
1707         mutex_lock(&root->fs_info->fs_mutex);
1708         err = btrfs_check_free_space(root, 1, 0);
1709         if (err)
1710                 goto out_unlock;
1711
1712         trans = btrfs_start_transaction(root, 1);
1713         btrfs_set_trans_block_group(trans, dir);
1714
1715         if (IS_ERR(trans)) {
1716                 err = PTR_ERR(trans);
1717                 goto out_unlock;
1718         }
1719
1720         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1721         if (err) {
1722                 err = -ENOSPC;
1723                 goto out_unlock;
1724         }
1725
1726         inode = btrfs_new_inode(trans, root, objectid,
1727                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1728         if (IS_ERR(inode)) {
1729                 err = PTR_ERR(inode);
1730                 goto out_fail;
1731         }
1732
1733         drop_on_err = 1;
1734         inode->i_op = &btrfs_dir_inode_operations;
1735         inode->i_fop = &btrfs_dir_file_operations;
1736         btrfs_set_trans_block_group(trans, inode);
1737
1738         inode->i_size = 0;
1739         err = btrfs_update_inode(trans, root, inode);
1740         if (err)
1741                 goto out_fail;
1742
1743         err = btrfs_add_link(trans, dentry, inode);
1744         if (err)
1745                 goto out_fail;
1746
1747         d_instantiate(dentry, inode);
1748         drop_on_err = 0;
1749         dir->i_sb->s_dirt = 1;
1750         btrfs_update_inode_block_group(trans, inode);
1751         btrfs_update_inode_block_group(trans, dir);
1752
1753 out_fail:
1754         nr = trans->blocks_used;
1755         btrfs_end_transaction(trans, root);
1756
1757 out_unlock:
1758         mutex_unlock(&root->fs_info->fs_mutex);
1759         if (drop_on_err)
1760                 iput(inode);
1761         btrfs_btree_balance_dirty(root, nr);
1762         btrfs_throttle(root);
1763         return err;
1764 }
1765
1766 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
1767                                     size_t page_offset, u64 start, u64 end,
1768                                     int create)
1769 {
1770         int ret;
1771         int err = 0;
1772         u64 bytenr;
1773         u64 extent_start = 0;
1774         u64 extent_end = 0;
1775         u64 objectid = inode->i_ino;
1776         u32 found_type;
1777         int failed_insert = 0;
1778         struct btrfs_path *path;
1779         struct btrfs_root *root = BTRFS_I(inode)->root;
1780         struct btrfs_file_extent_item *item;
1781         struct extent_buffer *leaf;
1782         struct btrfs_key found_key;
1783         struct extent_map *em = NULL;
1784         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1785         struct btrfs_trans_handle *trans = NULL;
1786
1787         path = btrfs_alloc_path();
1788         BUG_ON(!path);
1789         mutex_lock(&root->fs_info->fs_mutex);
1790
1791 again:
1792         em = lookup_extent_mapping(em_tree, start, end);
1793         if (em) {
1794                 if (em->start > start) {
1795                         printk("get_extent start %Lu em start %Lu\n",
1796                                start, em->start);
1797                         WARN_ON(1);
1798                 }
1799                 goto out;
1800         }
1801         if (!em) {
1802                 em = alloc_extent_map(GFP_NOFS);
1803                 if (!em) {
1804                         err = -ENOMEM;
1805                         goto out;
1806                 }
1807                 em->start = EXTENT_MAP_HOLE;
1808                 em->end = EXTENT_MAP_HOLE;
1809         }
1810         em->bdev = inode->i_sb->s_bdev;
1811         ret = btrfs_lookup_file_extent(trans, root, path,
1812                                        objectid, start, trans != NULL);
1813         if (ret < 0) {
1814                 err = ret;
1815                 goto out;
1816         }
1817
1818         if (ret != 0) {
1819                 if (path->slots[0] == 0)
1820                         goto not_found;
1821                 path->slots[0]--;
1822         }
1823
1824         leaf = path->nodes[0];
1825         item = btrfs_item_ptr(leaf, path->slots[0],
1826                               struct btrfs_file_extent_item);
1827         /* are we inside the extent that was found? */
1828         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1829         found_type = btrfs_key_type(&found_key);
1830         if (found_key.objectid != objectid ||
1831             found_type != BTRFS_EXTENT_DATA_KEY) {
1832                 goto not_found;
1833         }
1834
1835         found_type = btrfs_file_extent_type(leaf, item);
1836         extent_start = found_key.offset;
1837         if (found_type == BTRFS_FILE_EXTENT_REG) {
1838                 extent_end = extent_start +
1839                        btrfs_file_extent_num_bytes(leaf, item);
1840                 err = 0;
1841                 if (start < extent_start || start >= extent_end) {
1842                         em->start = start;
1843                         if (start < extent_start) {
1844                                 if (end < extent_start)
1845                                         goto not_found;
1846                                 em->end = extent_end - 1;
1847                         } else {
1848                                 em->end = end;
1849                         }
1850                         goto not_found_em;
1851                 }
1852                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
1853                 if (bytenr == 0) {
1854                         em->start = extent_start;
1855                         em->end = extent_end - 1;
1856                         em->block_start = EXTENT_MAP_HOLE;
1857                         em->block_end = EXTENT_MAP_HOLE;
1858                         goto insert;
1859                 }
1860                 bytenr += btrfs_file_extent_offset(leaf, item);
1861                 em->block_start = bytenr;
1862                 em->block_end = em->block_start +
1863                         btrfs_file_extent_num_bytes(leaf, item) - 1;
1864                 em->start = extent_start;
1865                 em->end = extent_end - 1;
1866                 goto insert;
1867         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1868                 unsigned long ptr;
1869                 char *map;
1870                 size_t size;
1871                 size_t extent_offset;
1872                 size_t copy_size;
1873
1874                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
1875                                                     path->slots[0]));
1876                 extent_end = (extent_start + size - 1) |
1877                         ((u64)root->sectorsize - 1);
1878                 if (start < extent_start || start >= extent_end) {
1879                         em->start = start;
1880                         if (start < extent_start) {
1881                                 if (end < extent_start)
1882                                         goto not_found;
1883                                 em->end = extent_end;
1884                         } else {
1885                                 em->end = end;
1886                         }
1887                         goto not_found_em;
1888                 }
1889                 em->block_start = EXTENT_MAP_INLINE;
1890                 em->block_end = EXTENT_MAP_INLINE;
1891
1892                 if (!page) {
1893                         em->start = extent_start;
1894                         em->end = extent_start + size - 1;
1895                         goto out;
1896                 }
1897
1898                 extent_offset = ((u64)page->index << PAGE_CACHE_SHIFT) -
1899                         extent_start + page_offset;
1900                 copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset,
1901                                 size - extent_offset);
1902                 em->start = extent_start + extent_offset;
1903                 em->end = (em->start + copy_size -1) |
1904                         ((u64)root->sectorsize -1);
1905                 map = kmap(page);
1906                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
1907                 if (create == 0 && !PageUptodate(page)) {
1908                         read_extent_buffer(leaf, map + page_offset, ptr,
1909                                            copy_size);
1910                         flush_dcache_page(page);
1911                 } else if (create && PageUptodate(page)) {
1912                         if (!trans) {
1913                                 kunmap(page);
1914                                 free_extent_map(em);
1915                                 em = NULL;
1916                                 btrfs_release_path(root, path);
1917                                 trans = btrfs_start_transaction(root, 1);
1918                                 goto again;
1919                         }
1920                         write_extent_buffer(leaf, map + page_offset, ptr,
1921                                             copy_size);
1922                         btrfs_mark_buffer_dirty(leaf);
1923                 }
1924                 kunmap(page);
1925                 set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS);
1926                 goto insert;
1927         } else {
1928                 printk("unkknown found_type %d\n", found_type);
1929                 WARN_ON(1);
1930         }
1931 not_found:
1932         em->start = start;
1933         em->end = end;
1934 not_found_em:
1935         em->block_start = EXTENT_MAP_HOLE;
1936         em->block_end = EXTENT_MAP_HOLE;
1937 insert:
1938         btrfs_release_path(root, path);
1939         if (em->start > start || em->end < start) {
1940                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end);
1941                 err = -EIO;
1942                 goto out;
1943         }
1944         ret = add_extent_mapping(em_tree, em);
1945         if (ret == -EEXIST) {
1946                 free_extent_map(em);
1947                 em = NULL;
1948                 if (0 && failed_insert == 1) {
1949                         btrfs_drop_extent_cache(inode, start, end);
1950                 }
1951                 failed_insert++;
1952                 if (failed_insert > 5) {
1953                         printk("failing to insert %Lu %Lu\n", start, end);
1954                         err = -EIO;
1955                         goto out;
1956                 }
1957                 goto again;
1958         }
1959         err = 0;
1960 out:
1961         btrfs_free_path(path);
1962         if (trans) {
1963                 ret = btrfs_end_transaction(trans, root);
1964                 if (!err)
1965                         err = ret;
1966         }
1967         mutex_unlock(&root->fs_info->fs_mutex);
1968         if (err) {
1969                 free_extent_map(em);
1970                 WARN_ON(1);
1971                 return ERR_PTR(err);
1972         }
1973         return em;
1974 }
1975
1976 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
1977 {
1978         return extent_bmap(mapping, iblock, btrfs_get_extent);
1979 }
1980
1981 int btrfs_readpage(struct file *file, struct page *page)
1982 {
1983         struct extent_map_tree *tree;
1984         tree = &BTRFS_I(page->mapping->host)->extent_tree;
1985         return extent_read_full_page(tree, page, btrfs_get_extent);
1986 }
1987
1988 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1989 {
1990         struct extent_map_tree *tree;
1991
1992
1993         if (current->flags & PF_MEMALLOC) {
1994                 redirty_page_for_writepage(wbc, page);
1995                 unlock_page(page);
1996                 return 0;
1997         }
1998         tree = &BTRFS_I(page->mapping->host)->extent_tree;
1999         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2000 }
2001
2002 static int btrfs_writepages(struct address_space *mapping,
2003                             struct writeback_control *wbc)
2004 {
2005         struct extent_map_tree *tree;
2006         tree = &BTRFS_I(mapping->host)->extent_tree;
2007         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2008 }
2009
2010 static int
2011 btrfs_readpages(struct file *file, struct address_space *mapping,
2012                 struct list_head *pages, unsigned nr_pages)
2013 {
2014         struct extent_map_tree *tree;
2015         tree = &BTRFS_I(mapping->host)->extent_tree;
2016         return extent_readpages(tree, mapping, pages, nr_pages,
2017                                 btrfs_get_extent);
2018 }
2019
2020 static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
2021 {
2022         struct extent_map_tree *tree;
2023         int ret;
2024
2025         tree = &BTRFS_I(page->mapping->host)->extent_tree;
2026         ret = try_release_extent_mapping(tree, page);
2027         if (ret == 1) {
2028                 ClearPagePrivate(page);
2029                 set_page_private(page, 0);
2030                 page_cache_release(page);
2031         }
2032         return ret;
2033 }
2034
2035 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2036 {
2037         struct extent_map_tree *tree;
2038
2039         tree = &BTRFS_I(page->mapping->host)->extent_tree;
2040         extent_invalidatepage(tree, page, offset);
2041         btrfs_releasepage(page, GFP_NOFS);
2042 }
2043
2044 /*
2045  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2046  * called from a page fault handler when a page is first dirtied. Hence we must
2047  * be careful to check for EOF conditions here. We set the page up correctly
2048  * for a written page which means we get ENOSPC checking when writing into
2049  * holes and correct delalloc and unwritten extent mapping on filesystems that
2050  * support these features.
2051  *
2052  * We are not allowed to take the i_mutex here so we have to play games to
2053  * protect against truncate races as the page could now be beyond EOF.  Because
2054  * vmtruncate() writes the inode size before removing pages, once we have the
2055  * page lock we can determine safely if the page is beyond EOF. If it is not
2056  * beyond EOF, then the page is guaranteed safe against truncation until we
2057  * unlock the page.
2058  */
2059 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2060 {
2061         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2062         struct btrfs_root *root = BTRFS_I(inode)->root;
2063         unsigned long end;
2064         loff_t size;
2065         int ret;
2066         u64 page_start;
2067
2068         mutex_lock(&root->fs_info->fs_mutex);
2069         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2070         mutex_unlock(&root->fs_info->fs_mutex);
2071         if (ret)
2072                 goto out;
2073
2074         ret = -EINVAL;
2075
2076         lock_page(page);
2077         wait_on_page_writeback(page);
2078         size = i_size_read(inode);
2079         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2080
2081         if ((page->mapping != inode->i_mapping) ||
2082             (page_start > size)) {
2083                 /* page got truncated out from underneath us */
2084                 goto out_unlock;
2085         }
2086
2087         /* page is wholly or partially inside EOF */
2088         if (page_start + PAGE_CACHE_SIZE > size)
2089                 end = size & ~PAGE_CACHE_MASK;
2090         else
2091                 end = PAGE_CACHE_SIZE;
2092
2093         ret = btrfs_cow_one_page(inode, page, end);
2094
2095 out_unlock:
2096         unlock_page(page);
2097 out:
2098         return ret;
2099 }
2100
2101 static void btrfs_truncate(struct inode *inode)
2102 {
2103         struct btrfs_root *root = BTRFS_I(inode)->root;
2104         int ret;
2105         struct btrfs_trans_handle *trans;
2106         unsigned long nr;
2107
2108         if (!S_ISREG(inode->i_mode))
2109                 return;
2110         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2111                 return;
2112
2113         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2114
2115         mutex_lock(&root->fs_info->fs_mutex);
2116         trans = btrfs_start_transaction(root, 1);
2117         btrfs_set_trans_block_group(trans, inode);
2118
2119         /* FIXME, add redo link to tree so we don't leak on crash */
2120         ret = btrfs_truncate_in_trans(trans, root, inode);
2121         btrfs_update_inode(trans, root, inode);
2122         nr = trans->blocks_used;
2123
2124         ret = btrfs_end_transaction(trans, root);
2125         BUG_ON(ret);
2126         mutex_unlock(&root->fs_info->fs_mutex);
2127         btrfs_btree_balance_dirty(root, nr);
2128         btrfs_throttle(root);
2129 }
2130
2131 static int noinline create_subvol(struct btrfs_root *root, char *name,
2132                                   int namelen)
2133 {
2134         struct btrfs_trans_handle *trans;
2135         struct btrfs_key key;
2136         struct btrfs_root_item root_item;
2137         struct btrfs_inode_item *inode_item;
2138         struct extent_buffer *leaf;
2139         struct btrfs_root *new_root = root;
2140         struct inode *inode;
2141         struct inode *dir;
2142         int ret;
2143         int err;
2144         u64 objectid;
2145         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2146         unsigned long nr = 1;
2147
2148         mutex_lock(&root->fs_info->fs_mutex);
2149         ret = btrfs_check_free_space(root, 1, 0);
2150         if (ret)
2151                 goto fail_commit;
2152
2153         trans = btrfs_start_transaction(root, 1);
2154         BUG_ON(!trans);
2155
2156         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2157                                        0, &objectid);
2158         if (ret)
2159                 goto fail;
2160
2161         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2162                                         objectid, trans->transid, 0, 0,
2163                                         0, 0);
2164         if (IS_ERR(leaf))
2165                 return PTR_ERR(leaf);
2166
2167         btrfs_set_header_nritems(leaf, 0);
2168         btrfs_set_header_level(leaf, 0);
2169         btrfs_set_header_bytenr(leaf, leaf->start);
2170         btrfs_set_header_generation(leaf, trans->transid);
2171         btrfs_set_header_owner(leaf, objectid);
2172
2173         write_extent_buffer(leaf, root->fs_info->fsid,
2174                             (unsigned long)btrfs_header_fsid(leaf),
2175                             BTRFS_FSID_SIZE);
2176         btrfs_mark_buffer_dirty(leaf);
2177
2178         inode_item = &root_item.inode;
2179         memset(inode_item, 0, sizeof(*inode_item));
2180         inode_item->generation = cpu_to_le64(1);
2181         inode_item->size = cpu_to_le64(3);
2182         inode_item->nlink = cpu_to_le32(1);
2183         inode_item->nblocks = cpu_to_le64(1);
2184         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2185
2186         btrfs_set_root_bytenr(&root_item, leaf->start);
2187         btrfs_set_root_level(&root_item, 0);
2188         btrfs_set_root_refs(&root_item, 1);
2189         btrfs_set_root_used(&root_item, 0);
2190
2191         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2192         root_item.drop_level = 0;
2193
2194         free_extent_buffer(leaf);
2195         leaf = NULL;
2196
2197         btrfs_set_root_dirid(&root_item, new_dirid);
2198
2199         key.objectid = objectid;
2200         key.offset = 1;
2201         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2202         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2203                                 &root_item);
2204         if (ret)
2205                 goto fail;
2206
2207         /*
2208          * insert the directory item
2209          */
2210         key.offset = (u64)-1;
2211         dir = root->fs_info->sb->s_root->d_inode;
2212         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2213                                     name, namelen, dir->i_ino, &key,
2214                                     BTRFS_FT_DIR);
2215         if (ret)
2216                 goto fail;
2217
2218         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2219                              name, namelen, objectid,
2220                              root->fs_info->sb->s_root->d_inode->i_ino);
2221         if (ret)
2222                 goto fail;
2223
2224         ret = btrfs_commit_transaction(trans, root);
2225         if (ret)
2226                 goto fail_commit;
2227
2228         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2229         BUG_ON(!new_root);
2230
2231         trans = btrfs_start_transaction(new_root, 1);
2232         BUG_ON(!trans);
2233
2234         inode = btrfs_new_inode(trans, new_root, new_dirid,
2235                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2236         if (IS_ERR(inode))
2237                 goto fail;
2238         inode->i_op = &btrfs_dir_inode_operations;
2239         inode->i_fop = &btrfs_dir_file_operations;
2240         new_root->inode = inode;
2241
2242         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2243                                      new_dirid);
2244         inode->i_nlink = 1;
2245         inode->i_size = 0;
2246         ret = btrfs_update_inode(trans, new_root, inode);
2247         if (ret)
2248                 goto fail;
2249 fail:
2250         nr = trans->blocks_used;
2251         err = btrfs_commit_transaction(trans, new_root);
2252         if (err && !ret)
2253                 ret = err;
2254 fail_commit:
2255         mutex_unlock(&root->fs_info->fs_mutex);
2256         btrfs_btree_balance_dirty(root, nr);
2257         btrfs_throttle(root);
2258         return ret;
2259 }
2260
2261 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2262 {
2263         struct btrfs_pending_snapshot *pending_snapshot;
2264         struct btrfs_trans_handle *trans;
2265         int ret;
2266         int err;
2267         unsigned long nr = 0;
2268
2269         if (!root->ref_cows)
2270                 return -EINVAL;
2271
2272         mutex_lock(&root->fs_info->fs_mutex);
2273         ret = btrfs_check_free_space(root, 1, 0);
2274         if (ret)
2275                 goto fail_unlock;
2276
2277         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2278         if (!pending_snapshot) {
2279                 ret = -ENOMEM;
2280                 goto fail_unlock;
2281         }
2282         pending_snapshot->name = kstrndup(name, namelen, GFP_NOFS);
2283         if (!pending_snapshot->name) {
2284                 ret = -ENOMEM;
2285                 kfree(pending_snapshot);
2286                 goto fail_unlock;
2287         }
2288         trans = btrfs_start_transaction(root, 1);
2289         BUG_ON(!trans);
2290
2291         pending_snapshot->root = root;
2292         list_add(&pending_snapshot->list,
2293                  &trans->transaction->pending_snapshots);
2294         ret = btrfs_update_inode(trans, root, root->inode);
2295         err = btrfs_commit_transaction(trans, root);
2296
2297 fail_unlock:
2298         mutex_unlock(&root->fs_info->fs_mutex);
2299         btrfs_btree_balance_dirty(root, nr);
2300         btrfs_throttle(root);
2301         return ret;
2302 }
2303
2304 unsigned long btrfs_force_ra(struct address_space *mapping,
2305                               struct file_ra_state *ra, struct file *file,
2306                               pgoff_t offset, pgoff_t last_index)
2307 {
2308         pgoff_t req_size;
2309
2310 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2311         req_size = last_index - offset + 1;
2312         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2313         return offset;
2314 #else
2315         req_size = min(last_index - offset + 1, (pgoff_t)128);
2316         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2317         return offset + req_size;
2318 #endif
2319 }
2320
2321 int btrfs_defrag_file(struct file *file) {
2322         struct inode *inode = fdentry(file)->d_inode;
2323         struct btrfs_root *root = BTRFS_I(inode)->root;
2324         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2325         struct page *page;
2326         unsigned long last_index;
2327         unsigned long ra_index = 0;
2328         u64 page_start;
2329         u64 page_end;
2330         u64 delalloc_start;
2331         u64 existing_delalloc;
2332         unsigned long i;
2333         int ret;
2334
2335         mutex_lock(&root->fs_info->fs_mutex);
2336         ret = btrfs_check_free_space(root, inode->i_size, 0);
2337         mutex_unlock(&root->fs_info->fs_mutex);
2338         if (ret)
2339                 return -ENOSPC;
2340
2341         mutex_lock(&inode->i_mutex);
2342         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2343         for (i = 0; i <= last_index; i++) {
2344                 if (i == ra_index) {
2345                         ra_index = btrfs_force_ra(inode->i_mapping,
2346                                                   &file->f_ra,
2347                                                   file, ra_index, last_index);
2348                 }
2349                 page = grab_cache_page(inode->i_mapping, i);
2350                 if (!page)
2351                         goto out_unlock;
2352                 if (!PageUptodate(page)) {
2353                         btrfs_readpage(NULL, page);
2354                         lock_page(page);
2355                         if (!PageUptodate(page)) {
2356                                 unlock_page(page);
2357                                 page_cache_release(page);
2358                                 goto out_unlock;
2359                         }
2360                 }
2361                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2362                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2363
2364                 lock_extent(em_tree, page_start, page_end, GFP_NOFS);
2365                 delalloc_start = page_start;
2366                 existing_delalloc =
2367                         count_range_bits(&BTRFS_I(inode)->extent_tree,
2368                                          &delalloc_start, page_end,
2369                                          PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2370                 set_extent_delalloc(em_tree, page_start,
2371                                     page_end, GFP_NOFS);
2372
2373                 spin_lock(&root->fs_info->delalloc_lock);
2374                 root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE -
2375                                                  existing_delalloc;
2376                 spin_unlock(&root->fs_info->delalloc_lock);
2377
2378                 unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
2379                 set_page_dirty(page);
2380                 unlock_page(page);
2381                 page_cache_release(page);
2382                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2383         }
2384
2385 out_unlock:
2386         mutex_unlock(&inode->i_mutex);
2387         return 0;
2388 }
2389
2390 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2391 {
2392         u64 new_size;
2393         u64 old_size;
2394         struct btrfs_ioctl_vol_args *vol_args;
2395         struct btrfs_trans_handle *trans;
2396         char *sizestr;
2397         int ret = 0;
2398         int namelen;
2399         int mod = 0;
2400
2401         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2402
2403         if (!vol_args)
2404                 return -ENOMEM;
2405
2406         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2407                 ret = -EFAULT;
2408                 goto out;
2409         }
2410         namelen = strlen(vol_args->name);
2411         if (namelen > BTRFS_VOL_NAME_MAX) {
2412                 ret = -EINVAL;
2413                 goto out;
2414         }
2415
2416         sizestr = vol_args->name;
2417         if (!strcmp(sizestr, "max"))
2418                 new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
2419         else {
2420                 if (sizestr[0] == '-') {
2421                         mod = -1;
2422                         sizestr++;
2423                 } else if (sizestr[0] == '+') {
2424                         mod = 1;
2425                         sizestr++;
2426                 }
2427                 new_size = btrfs_parse_size(sizestr);
2428                 if (new_size == 0) {
2429                         ret = -EINVAL;
2430                         goto out;
2431                 }
2432         }
2433
2434         mutex_lock(&root->fs_info->fs_mutex);
2435         old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
2436
2437         if (mod < 0) {
2438                 if (new_size > old_size) {
2439                         ret = -EINVAL;
2440                         goto out_unlock;
2441                 }
2442                 new_size = old_size - new_size;
2443         } else if (mod > 0) {
2444                 new_size = old_size + new_size;
2445         }
2446
2447         if (new_size < 256 * 1024 * 1024) {
2448                 ret = -EINVAL;
2449                 goto out_unlock;
2450         }
2451         if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
2452                 ret = -EFBIG;
2453                 goto out_unlock;
2454         }
2455
2456         do_div(new_size, root->sectorsize);
2457         new_size *= root->sectorsize;
2458
2459 printk("new size is %Lu\n", new_size);
2460         if (new_size > old_size) {
2461                 trans = btrfs_start_transaction(root, 1);
2462                 ret = btrfs_grow_extent_tree(trans, root, new_size);
2463                 btrfs_commit_transaction(trans, root);
2464         } else {
2465                 ret = btrfs_shrink_extent_tree(root, new_size);
2466         }
2467
2468 out_unlock:
2469         mutex_unlock(&root->fs_info->fs_mutex);
2470 out:
2471         kfree(vol_args);
2472         return ret;
2473 }
2474
2475 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
2476                                             void __user *arg)
2477 {
2478         struct btrfs_ioctl_vol_args *vol_args;
2479         struct btrfs_dir_item *di;
2480         struct btrfs_path *path;
2481         u64 root_dirid;
2482         int namelen;
2483         int ret;
2484
2485         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2486
2487         if (!vol_args)
2488                 return -ENOMEM;
2489
2490         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2491                 ret = -EFAULT;
2492                 goto out;
2493         }
2494
2495         namelen = strlen(vol_args->name);
2496         if (namelen > BTRFS_VOL_NAME_MAX) {
2497                 ret = -EINVAL;
2498                 goto out;
2499         }
2500         if (strchr(vol_args->name, '/')) {
2501                 ret = -EINVAL;
2502                 goto out;
2503         }
2504
2505         path = btrfs_alloc_path();
2506         if (!path) {
2507                 ret = -ENOMEM;
2508                 goto out;
2509         }
2510
2511         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2512         mutex_lock(&root->fs_info->fs_mutex);
2513         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2514                             path, root_dirid,
2515                             vol_args->name, namelen, 0);
2516         mutex_unlock(&root->fs_info->fs_mutex);
2517         btrfs_free_path(path);
2518
2519         if (di && !IS_ERR(di)) {
2520                 ret = -EEXIST;
2521                 goto out;
2522         }
2523
2524         if (IS_ERR(di)) {
2525                 ret = PTR_ERR(di);
2526                 goto out;
2527         }
2528
2529         if (root == root->fs_info->tree_root)
2530                 ret = create_subvol(root, vol_args->name, namelen);
2531         else
2532                 ret = create_snapshot(root, vol_args->name, namelen);
2533 out:
2534         kfree(vol_args);
2535         return ret;
2536 }
2537
2538 static int btrfs_ioctl_defrag(struct file *file)
2539 {
2540         struct inode *inode = fdentry(file)->d_inode;
2541         struct btrfs_root *root = BTRFS_I(inode)->root;
2542
2543         switch (inode->i_mode & S_IFMT) {
2544         case S_IFDIR:
2545                 mutex_lock(&root->fs_info->fs_mutex);
2546                 btrfs_defrag_root(root, 0);
2547                 btrfs_defrag_root(root->fs_info->extent_root, 0);
2548                 mutex_unlock(&root->fs_info->fs_mutex);
2549                 break;
2550         case S_IFREG:
2551                 btrfs_defrag_file(file);
2552                 break;
2553         }
2554
2555         return 0;
2556 }
2557
2558 long btrfs_ioctl(struct file *file, unsigned int
2559                 cmd, unsigned long arg)
2560 {
2561         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
2562
2563         switch (cmd) {
2564         case BTRFS_IOC_SNAP_CREATE:
2565                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
2566         case BTRFS_IOC_DEFRAG:
2567                 return btrfs_ioctl_defrag(file);
2568         case BTRFS_IOC_RESIZE:
2569                 return btrfs_ioctl_resize(root, (void __user *)arg);
2570         }
2571
2572         return -ENOTTY;
2573 }
2574
2575 /*
2576  * Called inside transaction, so use GFP_NOFS
2577  */
2578 struct inode *btrfs_alloc_inode(struct super_block *sb)
2579 {
2580         struct btrfs_inode *ei;
2581
2582         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2583         if (!ei)
2584                 return NULL;
2585         ei->last_trans = 0;
2586         ei->ordered_trans = 0;
2587         return &ei->vfs_inode;
2588 }
2589
2590 void btrfs_destroy_inode(struct inode *inode)
2591 {
2592         WARN_ON(!list_empty(&inode->i_dentry));
2593         WARN_ON(inode->i_data.nrpages);
2594
2595         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2596 }
2597
2598 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2599 static void init_once(struct kmem_cache * cachep, void *foo)
2600 #else
2601 static void init_once(void * foo, struct kmem_cache * cachep,
2602                       unsigned long flags)
2603 #endif
2604 {
2605         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2606
2607         inode_init_once(&ei->vfs_inode);
2608 }
2609
2610 void btrfs_destroy_cachep(void)
2611 {
2612         if (btrfs_inode_cachep)
2613                 kmem_cache_destroy(btrfs_inode_cachep);
2614         if (btrfs_trans_handle_cachep)
2615                 kmem_cache_destroy(btrfs_trans_handle_cachep);
2616         if (btrfs_transaction_cachep)
2617                 kmem_cache_destroy(btrfs_transaction_cachep);
2618         if (btrfs_bit_radix_cachep)
2619                 kmem_cache_destroy(btrfs_bit_radix_cachep);
2620         if (btrfs_path_cachep)
2621                 kmem_cache_destroy(btrfs_path_cachep);
2622 }
2623
2624 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
2625                                        unsigned long extra_flags,
2626 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2627                                        void (*ctor)(struct kmem_cache *, void *)
2628 #else
2629                                        void (*ctor)(void *, struct kmem_cache *,
2630                                                     unsigned long)
2631 #endif
2632                                      )
2633 {
2634         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2635                                  SLAB_MEM_SPREAD | extra_flags), ctor
2636 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2637                                  ,NULL
2638 #endif
2639                                 );
2640 }
2641
2642 int btrfs_init_cachep(void)
2643 {
2644         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
2645                                           sizeof(struct btrfs_inode),
2646                                           0, init_once);
2647         if (!btrfs_inode_cachep)
2648                 goto fail;
2649         btrfs_trans_handle_cachep =
2650                         btrfs_cache_create("btrfs_trans_handle_cache",
2651                                            sizeof(struct btrfs_trans_handle),
2652                                            0, NULL);
2653         if (!btrfs_trans_handle_cachep)
2654                 goto fail;
2655         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
2656                                              sizeof(struct btrfs_transaction),
2657                                              0, NULL);
2658         if (!btrfs_transaction_cachep)
2659                 goto fail;
2660         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
2661                                          sizeof(struct btrfs_path),
2662                                          0, NULL);
2663         if (!btrfs_path_cachep)
2664                 goto fail;
2665         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
2666                                               SLAB_DESTROY_BY_RCU, NULL);
2667         if (!btrfs_bit_radix_cachep)
2668                 goto fail;
2669         return 0;
2670 fail:
2671         btrfs_destroy_cachep();
2672         return -ENOMEM;
2673 }
2674
2675 static int btrfs_getattr(struct vfsmount *mnt,
2676                          struct dentry *dentry, struct kstat *stat)
2677 {
2678         struct inode *inode = dentry->d_inode;
2679         generic_fillattr(inode, stat);
2680         stat->blksize = PAGE_CACHE_SIZE;
2681         return 0;
2682 }
2683
2684 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2685                            struct inode * new_dir,struct dentry *new_dentry)
2686 {
2687         struct btrfs_trans_handle *trans;
2688         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2689         struct inode *new_inode = new_dentry->d_inode;
2690         struct inode *old_inode = old_dentry->d_inode;
2691         struct timespec ctime = CURRENT_TIME;
2692         struct btrfs_path *path;
2693         int ret;
2694
2695         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2696             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2697                 return -ENOTEMPTY;
2698         }
2699
2700         mutex_lock(&root->fs_info->fs_mutex);
2701         ret = btrfs_check_free_space(root, 1, 0);
2702         if (ret)
2703                 goto out_unlock;
2704
2705         trans = btrfs_start_transaction(root, 1);
2706
2707         btrfs_set_trans_block_group(trans, new_dir);
2708         path = btrfs_alloc_path();
2709         if (!path) {
2710                 ret = -ENOMEM;
2711                 goto out_fail;
2712         }
2713
2714         old_dentry->d_inode->i_nlink++;
2715         old_dir->i_ctime = old_dir->i_mtime = ctime;
2716         new_dir->i_ctime = new_dir->i_mtime = ctime;
2717         old_inode->i_ctime = ctime;
2718
2719         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2720         if (ret)
2721                 goto out_fail;
2722
2723         if (new_inode) {
2724                 new_inode->i_ctime = CURRENT_TIME;
2725                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2726                 if (ret)
2727                         goto out_fail;
2728         }
2729         ret = btrfs_add_link(trans, new_dentry, old_inode);
2730         if (ret)
2731                 goto out_fail;
2732
2733 out_fail:
2734         btrfs_free_path(path);
2735         btrfs_end_transaction(trans, root);
2736 out_unlock:
2737         mutex_unlock(&root->fs_info->fs_mutex);
2738         return ret;
2739 }
2740
2741 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2742                          const char *symname)
2743 {
2744         struct btrfs_trans_handle *trans;
2745         struct btrfs_root *root = BTRFS_I(dir)->root;
2746         struct btrfs_path *path;
2747         struct btrfs_key key;
2748         struct inode *inode = NULL;
2749         int err;
2750         int drop_inode = 0;
2751         u64 objectid;
2752         int name_len;
2753         int datasize;
2754         unsigned long ptr;
2755         struct btrfs_file_extent_item *ei;
2756         struct extent_buffer *leaf;
2757         unsigned long nr = 0;
2758
2759         name_len = strlen(symname) + 1;
2760         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2761                 return -ENAMETOOLONG;
2762
2763         mutex_lock(&root->fs_info->fs_mutex);
2764         err = btrfs_check_free_space(root, 1, 0);
2765         if (err)
2766                 goto out_fail;
2767
2768         trans = btrfs_start_transaction(root, 1);
2769         btrfs_set_trans_block_group(trans, dir);
2770
2771         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2772         if (err) {
2773                 err = -ENOSPC;
2774                 goto out_unlock;
2775         }
2776
2777         inode = btrfs_new_inode(trans, root, objectid,
2778                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2779         err = PTR_ERR(inode);
2780         if (IS_ERR(inode))
2781                 goto out_unlock;
2782
2783         btrfs_set_trans_block_group(trans, inode);
2784         err = btrfs_add_nondir(trans, dentry, inode);
2785         if (err)
2786                 drop_inode = 1;
2787         else {
2788                 inode->i_mapping->a_ops = &btrfs_aops;
2789                 inode->i_fop = &btrfs_file_operations;
2790                 inode->i_op = &btrfs_file_inode_operations;
2791                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
2792                                      inode->i_mapping, GFP_NOFS);
2793                 BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
2794         }
2795         dir->i_sb->s_dirt = 1;
2796         btrfs_update_inode_block_group(trans, inode);
2797         btrfs_update_inode_block_group(trans, dir);
2798         if (drop_inode)
2799                 goto out_unlock;
2800
2801         path = btrfs_alloc_path();
2802         BUG_ON(!path);
2803         key.objectid = inode->i_ino;
2804         key.offset = 0;
2805         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2806         datasize = btrfs_file_extent_calc_inline_size(name_len);
2807         err = btrfs_insert_empty_item(trans, root, path, &key,
2808                                       datasize);
2809         if (err) {
2810                 drop_inode = 1;
2811                 goto out_unlock;
2812         }
2813         leaf = path->nodes[0];
2814         ei = btrfs_item_ptr(leaf, path->slots[0],
2815                             struct btrfs_file_extent_item);
2816         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
2817         btrfs_set_file_extent_type(leaf, ei,
2818                                    BTRFS_FILE_EXTENT_INLINE);
2819         ptr = btrfs_file_extent_inline_start(ei);
2820         write_extent_buffer(leaf, symname, ptr, name_len);
2821         btrfs_mark_buffer_dirty(leaf);
2822         btrfs_free_path(path);
2823
2824         inode->i_op = &btrfs_symlink_inode_operations;
2825         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2826         inode->i_size = name_len - 1;
2827         err = btrfs_update_inode(trans, root, inode);
2828         if (err)
2829                 drop_inode = 1;
2830
2831 out_unlock:
2832         nr = trans->blocks_used;
2833         btrfs_end_transaction(trans, root);
2834 out_fail:
2835         mutex_unlock(&root->fs_info->fs_mutex);
2836         if (drop_inode) {
2837                 inode_dec_link_count(inode);
2838                 iput(inode);
2839         }
2840         btrfs_btree_balance_dirty(root, nr);
2841         btrfs_throttle(root);
2842         return err;
2843 }
2844
2845 static struct inode_operations btrfs_dir_inode_operations = {
2846         .lookup         = btrfs_lookup,
2847         .create         = btrfs_create,
2848         .unlink         = btrfs_unlink,
2849         .link           = btrfs_link,
2850         .mkdir          = btrfs_mkdir,
2851         .rmdir          = btrfs_rmdir,
2852         .rename         = btrfs_rename,
2853         .symlink        = btrfs_symlink,
2854         .setattr        = btrfs_setattr,
2855         .mknod          = btrfs_mknod,
2856         .setxattr       = generic_setxattr,
2857         .getxattr       = generic_getxattr,
2858         .listxattr      = btrfs_listxattr,
2859         .removexattr    = generic_removexattr,
2860 };
2861
2862 static struct inode_operations btrfs_dir_ro_inode_operations = {
2863         .lookup         = btrfs_lookup,
2864 };
2865
2866 static struct file_operations btrfs_dir_file_operations = {
2867         .llseek         = generic_file_llseek,
2868         .read           = generic_read_dir,
2869         .readdir        = btrfs_readdir,
2870         .unlocked_ioctl = btrfs_ioctl,
2871 #ifdef CONFIG_COMPAT
2872         .compat_ioctl   = btrfs_ioctl,
2873 #endif
2874 };
2875
2876 static struct extent_map_ops btrfs_extent_map_ops = {
2877         .fill_delalloc = run_delalloc_range,
2878         .writepage_io_hook = btrfs_writepage_io_hook,
2879         .readpage_io_hook = btrfs_readpage_io_hook,
2880         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
2881 };
2882
2883 static struct address_space_operations btrfs_aops = {
2884         .readpage       = btrfs_readpage,
2885         .writepage      = btrfs_writepage,
2886         .writepages     = btrfs_writepages,
2887         .readpages      = btrfs_readpages,
2888         .sync_page      = block_sync_page,
2889         .bmap           = btrfs_bmap,
2890         .invalidatepage = btrfs_invalidatepage,
2891         .releasepage    = btrfs_releasepage,
2892         .set_page_dirty = __set_page_dirty_nobuffers,
2893 };
2894
2895 static struct address_space_operations btrfs_symlink_aops = {
2896         .readpage       = btrfs_readpage,
2897         .writepage      = btrfs_writepage,
2898         .invalidatepage = btrfs_invalidatepage,
2899         .releasepage    = btrfs_releasepage,
2900 };
2901
2902 static struct inode_operations btrfs_file_inode_operations = {
2903         .truncate       = btrfs_truncate,
2904         .getattr        = btrfs_getattr,
2905         .setattr        = btrfs_setattr,
2906         .setxattr       = generic_setxattr,
2907         .getxattr       = generic_getxattr,
2908         .listxattr      = btrfs_listxattr,
2909         .removexattr    = generic_removexattr,
2910 };
2911
2912 static struct inode_operations btrfs_special_inode_operations = {
2913         .getattr        = btrfs_getattr,
2914         .setattr        = btrfs_setattr,
2915 };
2916
2917 static struct inode_operations btrfs_symlink_inode_operations = {
2918         .readlink       = generic_readlink,
2919         .follow_link    = page_follow_link_light,
2920         .put_link       = page_put_link,
2921 };