Btrfs: Pass down the expected generation number when reading tree blocks
[linux-2.6] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/buffer_head.h>
22 #include <linux/file.h>
23 #include <linux/fs.h>
24 #include <linux/pagemap.h>
25 #include <linux/highmem.h>
26 #include <linux/time.h>
27 #include <linux/init.h>
28 #include <linux/string.h>
29 #include <linux/smp_lock.h>
30 #include <linux/backing-dev.h>
31 #include <linux/mpage.h>
32 #include <linux/swap.h>
33 #include <linux/writeback.h>
34 #include <linux/statfs.h>
35 #include <linux/compat.h>
36 #include <linux/bit_spinlock.h>
37 #include <linux/version.h>
38 #include <linux/xattr.h>
39 #include "ctree.h"
40 #include "disk-io.h"
41 #include "transaction.h"
42 #include "btrfs_inode.h"
43 #include "ioctl.h"
44 #include "print-tree.h"
45 #include "volumes.h"
46
47 struct btrfs_iget_args {
48         u64 ino;
49         struct btrfs_root *root;
50 };
51
52 static struct inode_operations btrfs_dir_inode_operations;
53 static struct inode_operations btrfs_symlink_inode_operations;
54 static struct inode_operations btrfs_dir_ro_inode_operations;
55 static struct inode_operations btrfs_special_inode_operations;
56 static struct inode_operations btrfs_file_inode_operations;
57 static struct address_space_operations btrfs_aops;
58 static struct address_space_operations btrfs_symlink_aops;
59 static struct file_operations btrfs_dir_file_operations;
60 static struct extent_io_ops btrfs_extent_io_ops;
61
62 static struct kmem_cache *btrfs_inode_cachep;
63 struct kmem_cache *btrfs_trans_handle_cachep;
64 struct kmem_cache *btrfs_transaction_cachep;
65 struct kmem_cache *btrfs_bit_radix_cachep;
66 struct kmem_cache *btrfs_path_cachep;
67
68 #define S_SHIFT 12
69 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
70         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
71         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
72         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
73         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
74         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
75         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
76         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
77 };
78
79 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
80                            int for_del)
81 {
82         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
83         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
84         u64 thresh;
85         unsigned long flags;
86         int ret = 0;
87
88         if (for_del)
89                 thresh = total * 90;
90         else
91                 thresh = total * 85;
92
93         do_div(thresh, 100);
94
95         spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
96         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
97                 ret = -ENOSPC;
98         spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
99         return ret;
100 }
101
102 static int cow_file_range(struct inode *inode, u64 start, u64 end)
103 {
104         struct btrfs_root *root = BTRFS_I(inode)->root;
105         struct btrfs_trans_handle *trans;
106         u64 alloc_hint = 0;
107         u64 num_bytes;
108         u64 cur_alloc_size;
109         u64 blocksize = root->sectorsize;
110         u64 orig_start = start;
111         u64 orig_num_bytes;
112         struct btrfs_key ins;
113         int ret;
114
115         trans = btrfs_start_transaction(root, 1);
116         BUG_ON(!trans);
117         btrfs_set_trans_block_group(trans, inode);
118
119         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
120         num_bytes = max(blocksize,  num_bytes);
121         ret = btrfs_drop_extents(trans, root, inode,
122                                  start, start + num_bytes, start, &alloc_hint);
123         orig_num_bytes = num_bytes;
124
125         if (alloc_hint == EXTENT_MAP_INLINE)
126                 goto out;
127
128         BUG_ON(num_bytes > btrfs_super_total_bytes(&root->fs_info->super_copy));
129
130         while(num_bytes > 0) {
131                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
132                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
133                                          root->sectorsize,
134                                          root->root_key.objectid,
135                                          trans->transid,
136                                          inode->i_ino, start, 0,
137                                          alloc_hint, (u64)-1, &ins, 1);
138                 if (ret) {
139                         WARN_ON(1);
140                         goto out;
141                 }
142                 cur_alloc_size = ins.offset;
143                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
144                                                start, ins.objectid, ins.offset,
145                                                ins.offset, 0);
146                 inode->i_blocks += ins.offset >> 9;
147                 btrfs_check_file(root, inode);
148                 if (num_bytes < cur_alloc_size) {
149                         printk("num_bytes %Lu cur_alloc %Lu\n", num_bytes,
150                                cur_alloc_size);
151                         break;
152                 }
153                 num_bytes -= cur_alloc_size;
154                 alloc_hint = ins.objectid + ins.offset;
155                 start += cur_alloc_size;
156         }
157         btrfs_drop_extent_cache(inode, orig_start,
158                                 orig_start + orig_num_bytes - 1);
159         btrfs_add_ordered_inode(inode);
160         btrfs_update_inode(trans, root, inode);
161 out:
162         btrfs_end_transaction(trans, root);
163         return ret;
164 }
165
166 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
167 {
168         u64 extent_start;
169         u64 extent_end;
170         u64 bytenr;
171         u64 cow_end;
172         u64 loops = 0;
173         u64 total_fs_bytes;
174         struct btrfs_root *root = BTRFS_I(inode)->root;
175         struct btrfs_block_group_cache *block_group;
176         struct extent_buffer *leaf;
177         int found_type;
178         struct btrfs_path *path;
179         struct btrfs_file_extent_item *item;
180         int ret;
181         int err;
182         struct btrfs_key found_key;
183
184         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
185         path = btrfs_alloc_path();
186         BUG_ON(!path);
187 again:
188         ret = btrfs_lookup_file_extent(NULL, root, path,
189                                        inode->i_ino, start, 0);
190         if (ret < 0) {
191                 btrfs_free_path(path);
192                 return ret;
193         }
194
195         cow_end = end;
196         if (ret != 0) {
197                 if (path->slots[0] == 0)
198                         goto not_found;
199                 path->slots[0]--;
200         }
201
202         leaf = path->nodes[0];
203         item = btrfs_item_ptr(leaf, path->slots[0],
204                               struct btrfs_file_extent_item);
205
206         /* are we inside the extent that was found? */
207         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
208         found_type = btrfs_key_type(&found_key);
209         if (found_key.objectid != inode->i_ino ||
210             found_type != BTRFS_EXTENT_DATA_KEY)
211                 goto not_found;
212
213         found_type = btrfs_file_extent_type(leaf, item);
214         extent_start = found_key.offset;
215         if (found_type == BTRFS_FILE_EXTENT_REG) {
216                 u64 extent_num_bytes;
217
218                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
219                 extent_end = extent_start + extent_num_bytes;
220                 err = 0;
221
222                 if (loops && start != extent_start)
223                         goto not_found;
224
225                 if (start < extent_start || start >= extent_end)
226                         goto not_found;
227
228                 cow_end = min(end, extent_end - 1);
229                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
230                 if (bytenr == 0)
231                         goto not_found;
232
233                 if (btrfs_count_snapshots_in_path(root, path, inode->i_ino,
234                                                   bytenr) != 1) {
235                         goto not_found;
236                 }
237
238                 /*
239                  * we may be called by the resizer, make sure we're inside
240                  * the limits of the FS
241                  */
242                 block_group = btrfs_lookup_block_group(root->fs_info,
243                                                        bytenr);
244                 if (!block_group || block_group->ro)
245                         goto not_found;
246
247                 start = extent_end;
248         } else {
249                 goto not_found;
250         }
251 loop:
252         if (start > end) {
253                 btrfs_free_path(path);
254                 return 0;
255         }
256         btrfs_release_path(root, path);
257         loops++;
258         goto again;
259
260 not_found:
261         cow_file_range(inode, start, end);
262         start = end + 1;
263         goto loop;
264 }
265
266 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
267 {
268         struct btrfs_root *root = BTRFS_I(inode)->root;
269         int ret;
270         mutex_lock(&root->fs_info->fs_mutex);
271         if (btrfs_test_opt(root, NODATACOW) ||
272             btrfs_test_flag(inode, NODATACOW))
273                 ret = run_delalloc_nocow(inode, start, end);
274         else
275                 ret = cow_file_range(inode, start, end);
276
277         mutex_unlock(&root->fs_info->fs_mutex);
278         return ret;
279 }
280
281 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
282                        unsigned long old, unsigned long bits)
283 {
284         unsigned long flags;
285         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
286                 struct btrfs_root *root = BTRFS_I(inode)->root;
287                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
288                 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
289                 root->fs_info->delalloc_bytes += end - start + 1;
290                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
291         }
292         return 0;
293 }
294
295 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
296                          unsigned long old, unsigned long bits)
297 {
298         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
299                 struct btrfs_root *root = BTRFS_I(inode)->root;
300                 unsigned long flags;
301
302                 spin_lock_irqsave(&root->fs_info->delalloc_lock, flags);
303                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
304                         printk("warning: delalloc account %Lu %Lu\n",
305                                end - start + 1, root->fs_info->delalloc_bytes);
306                         root->fs_info->delalloc_bytes = 0;
307                         BTRFS_I(inode)->delalloc_bytes = 0;
308                 } else {
309                         root->fs_info->delalloc_bytes -= end - start + 1;
310                         BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
311                 }
312                 spin_unlock_irqrestore(&root->fs_info->delalloc_lock, flags);
313         }
314         return 0;
315 }
316
317 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
318                          size_t size, struct bio *bio)
319 {
320         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
321         struct btrfs_mapping_tree *map_tree;
322         u64 logical = bio->bi_sector << 9;
323         u64 length = 0;
324         u64 map_length;
325         int ret;
326
327         length = bio->bi_size;
328         map_tree = &root->fs_info->mapping_tree;
329         map_length = length;
330         ret = btrfs_map_block(map_tree, READ, logical,
331                               &map_length, NULL, 0);
332
333         if (map_length < length + size) {
334                 return 1;
335         }
336         return 0;
337 }
338
339 int __btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
340                           int mirror_num)
341 {
342         struct btrfs_root *root = BTRFS_I(inode)->root;
343         struct btrfs_trans_handle *trans;
344         int ret = 0;
345         char *sums = NULL;
346
347         ret = btrfs_csum_one_bio(root, bio, &sums);
348         BUG_ON(ret);
349
350         mutex_lock(&root->fs_info->fs_mutex);
351         trans = btrfs_start_transaction(root, 1);
352
353         btrfs_set_trans_block_group(trans, inode);
354         btrfs_csum_file_blocks(trans, root, inode, bio, sums);
355
356         ret = btrfs_end_transaction(trans, root);
357         BUG_ON(ret);
358         mutex_unlock(&root->fs_info->fs_mutex);
359
360         kfree(sums);
361
362         return btrfs_map_bio(root, rw, bio, mirror_num);
363 }
364
365 int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
366                           int mirror_num)
367 {
368         struct btrfs_root *root = BTRFS_I(inode)->root;
369         int ret = 0;
370
371         if (!(rw & (1 << BIO_RW))) {
372                 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
373                 BUG_ON(ret);
374                 goto mapit;
375         }
376
377         if (btrfs_test_opt(root, NODATASUM) ||
378             btrfs_test_flag(inode, NODATASUM)) {
379                 goto mapit;
380         }
381
382         return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
383                                    inode, rw, bio, mirror_num,
384                                    __btrfs_submit_bio_hook);
385 mapit:
386         return btrfs_map_bio(root, rw, bio, mirror_num);
387 }
388
389 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
390 {
391         int ret = 0;
392         struct inode *inode = page->mapping->host;
393         struct btrfs_root *root = BTRFS_I(inode)->root;
394         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
395         struct btrfs_csum_item *item;
396         struct btrfs_path *path = NULL;
397         u32 csum;
398
399         if (btrfs_test_opt(root, NODATASUM) ||
400             btrfs_test_flag(inode, NODATASUM))
401                 return 0;
402
403         mutex_lock(&root->fs_info->fs_mutex);
404         path = btrfs_alloc_path();
405         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
406         if (IS_ERR(item)) {
407                 ret = PTR_ERR(item);
408                 /* a csum that isn't present is a preallocated region. */
409                 if (ret == -ENOENT || ret == -EFBIG)
410                         ret = 0;
411                 csum = 0;
412                 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
413                 goto out;
414         }
415         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
416                            BTRFS_CRC32_SIZE);
417         set_state_private(io_tree, start, csum);
418 out:
419         if (path)
420                 btrfs_free_path(path);
421         mutex_unlock(&root->fs_info->fs_mutex);
422         return ret;
423 }
424
425 struct io_failure_record {
426         struct page *page;
427         u64 start;
428         u64 len;
429         u64 logical;
430         int last_mirror;
431 };
432
433 int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
434                                   struct page *page, u64 start, u64 end,
435                                   struct extent_state *state)
436 {
437         struct io_failure_record *failrec = NULL;
438         u64 private;
439         struct extent_map *em;
440         struct inode *inode = page->mapping->host;
441         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
442         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
443         struct bio *bio;
444         int num_copies;
445         int ret;
446         u64 logical;
447
448         ret = get_state_private(failure_tree, start, &private);
449         if (ret) {
450                 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
451                 if (!failrec)
452                         return -ENOMEM;
453                 failrec->start = start;
454                 failrec->len = end - start + 1;
455                 failrec->last_mirror = 0;
456
457                 spin_lock(&em_tree->lock);
458                 em = lookup_extent_mapping(em_tree, start, failrec->len);
459                 if (em->start > start || em->start + em->len < start) {
460                         free_extent_map(em);
461                         em = NULL;
462                 }
463                 spin_unlock(&em_tree->lock);
464
465                 if (!em || IS_ERR(em)) {
466                         kfree(failrec);
467                         return -EIO;
468                 }
469                 logical = start - em->start;
470                 logical = em->block_start + logical;
471                 failrec->logical = logical;
472                 free_extent_map(em);
473                 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
474                                 EXTENT_DIRTY, GFP_NOFS);
475                 set_state_private(failure_tree, start,
476                                  (u64)(unsigned long)failrec);
477         } else {
478                 failrec = (struct io_failure_record *)(unsigned long)private;
479         }
480         num_copies = btrfs_num_copies(
481                               &BTRFS_I(inode)->root->fs_info->mapping_tree,
482                               failrec->logical, failrec->len);
483         failrec->last_mirror++;
484         if (!state) {
485                 spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
486                 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
487                                                     failrec->start,
488                                                     EXTENT_LOCKED);
489                 if (state && state->start != failrec->start)
490                         state = NULL;
491                 spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
492         }
493         if (!state || failrec->last_mirror > num_copies) {
494                 set_state_private(failure_tree, failrec->start, 0);
495                 clear_extent_bits(failure_tree, failrec->start,
496                                   failrec->start + failrec->len - 1,
497                                   EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
498                 kfree(failrec);
499                 return -EIO;
500         }
501         bio = bio_alloc(GFP_NOFS, 1);
502         bio->bi_private = state;
503         bio->bi_end_io = failed_bio->bi_end_io;
504         bio->bi_sector = failrec->logical >> 9;
505         bio->bi_bdev = failed_bio->bi_bdev;
506         bio->bi_size = 0;
507         bio_add_page(bio, page, failrec->len, start - page_offset(page));
508         btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror);
509         return 0;
510 }
511
512 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
513                                struct extent_state *state)
514 {
515         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
516         struct inode *inode = page->mapping->host;
517         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
518         char *kaddr;
519         u64 private = ~(u32)0;
520         int ret;
521         struct btrfs_root *root = BTRFS_I(inode)->root;
522         u32 csum = ~(u32)0;
523         unsigned long flags;
524
525         if (btrfs_test_opt(root, NODATASUM) ||
526             btrfs_test_flag(inode, NODATASUM))
527                 return 0;
528         if (state && state->start == start) {
529                 private = state->private;
530                 ret = 0;
531         } else {
532                 ret = get_state_private(io_tree, start, &private);
533         }
534         local_irq_save(flags);
535         kaddr = kmap_atomic(page, KM_IRQ0);
536         if (ret) {
537                 goto zeroit;
538         }
539         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
540         btrfs_csum_final(csum, (char *)&csum);
541         if (csum != private) {
542                 goto zeroit;
543         }
544         kunmap_atomic(kaddr, KM_IRQ0);
545         local_irq_restore(flags);
546
547         /* if the io failure tree for this inode is non-empty,
548          * check to see if we've recovered from a failed IO
549          */
550         private = 0;
551         if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
552                              (u64)-1, 1, EXTENT_DIRTY)) {
553                 u64 private_failure;
554                 struct io_failure_record *failure;
555                 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
556                                         start, &private_failure);
557                 if (ret == 0) {
558                         failure = (struct io_failure_record *)(unsigned long)
559                                    private_failure;
560                         set_state_private(&BTRFS_I(inode)->io_failure_tree,
561                                           failure->start, 0);
562                         clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
563                                           failure->start,
564                                           failure->start + failure->len - 1,
565                                           EXTENT_DIRTY | EXTENT_LOCKED,
566                                           GFP_NOFS);
567                         kfree(failure);
568                 }
569         }
570         return 0;
571
572 zeroit:
573         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
574                page->mapping->host->i_ino, (unsigned long long)start, csum,
575                private);
576         memset(kaddr + offset, 1, end - start + 1);
577         flush_dcache_page(page);
578         kunmap_atomic(kaddr, KM_IRQ0);
579         local_irq_restore(flags);
580         if (private == 0)
581                 return 0;
582         return -EIO;
583 }
584
585 void btrfs_read_locked_inode(struct inode *inode)
586 {
587         struct btrfs_path *path;
588         struct extent_buffer *leaf;
589         struct btrfs_inode_item *inode_item;
590         struct btrfs_timespec *tspec;
591         struct btrfs_root *root = BTRFS_I(inode)->root;
592         struct btrfs_key location;
593         u64 alloc_group_block;
594         u32 rdev;
595         int ret;
596
597         path = btrfs_alloc_path();
598         BUG_ON(!path);
599         mutex_lock(&root->fs_info->fs_mutex);
600         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
601
602         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
603         if (ret)
604                 goto make_bad;
605
606         leaf = path->nodes[0];
607         inode_item = btrfs_item_ptr(leaf, path->slots[0],
608                                     struct btrfs_inode_item);
609
610         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
611         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
612         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
613         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
614         inode->i_size = btrfs_inode_size(leaf, inode_item);
615
616         tspec = btrfs_inode_atime(inode_item);
617         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
618         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
619
620         tspec = btrfs_inode_mtime(inode_item);
621         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
622         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
623
624         tspec = btrfs_inode_ctime(inode_item);
625         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
626         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
627
628         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
629         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
630         inode->i_rdev = 0;
631         rdev = btrfs_inode_rdev(leaf, inode_item);
632
633         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
634         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
635                                                        alloc_group_block);
636         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
637         if (!BTRFS_I(inode)->block_group) {
638                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
639                                                  NULL, 0,
640                                                  BTRFS_BLOCK_GROUP_METADATA, 0);
641         }
642         btrfs_free_path(path);
643         inode_item = NULL;
644
645         mutex_unlock(&root->fs_info->fs_mutex);
646
647         switch (inode->i_mode & S_IFMT) {
648         case S_IFREG:
649                 inode->i_mapping->a_ops = &btrfs_aops;
650                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
651                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
652                 inode->i_fop = &btrfs_file_operations;
653                 inode->i_op = &btrfs_file_inode_operations;
654                 break;
655         case S_IFDIR:
656                 inode->i_fop = &btrfs_dir_file_operations;
657                 if (root == root->fs_info->tree_root)
658                         inode->i_op = &btrfs_dir_ro_inode_operations;
659                 else
660                         inode->i_op = &btrfs_dir_inode_operations;
661                 break;
662         case S_IFLNK:
663                 inode->i_op = &btrfs_symlink_inode_operations;
664                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
665                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
666                 break;
667         default:
668                 init_special_inode(inode, inode->i_mode, rdev);
669                 break;
670         }
671         return;
672
673 make_bad:
674         btrfs_release_path(root, path);
675         btrfs_free_path(path);
676         mutex_unlock(&root->fs_info->fs_mutex);
677         make_bad_inode(inode);
678 }
679
680 static void fill_inode_item(struct extent_buffer *leaf,
681                             struct btrfs_inode_item *item,
682                             struct inode *inode)
683 {
684         btrfs_set_inode_uid(leaf, item, inode->i_uid);
685         btrfs_set_inode_gid(leaf, item, inode->i_gid);
686         btrfs_set_inode_size(leaf, item, inode->i_size);
687         btrfs_set_inode_mode(leaf, item, inode->i_mode);
688         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
689
690         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
691                                inode->i_atime.tv_sec);
692         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
693                                 inode->i_atime.tv_nsec);
694
695         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
696                                inode->i_mtime.tv_sec);
697         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
698                                 inode->i_mtime.tv_nsec);
699
700         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
701                                inode->i_ctime.tv_sec);
702         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
703                                 inode->i_ctime.tv_nsec);
704
705         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
706         btrfs_set_inode_generation(leaf, item, inode->i_generation);
707         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
708         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
709         btrfs_set_inode_block_group(leaf, item,
710                                     BTRFS_I(inode)->block_group->key.objectid);
711 }
712
713 int btrfs_update_inode(struct btrfs_trans_handle *trans,
714                               struct btrfs_root *root,
715                               struct inode *inode)
716 {
717         struct btrfs_inode_item *inode_item;
718         struct btrfs_path *path;
719         struct extent_buffer *leaf;
720         int ret;
721
722         path = btrfs_alloc_path();
723         BUG_ON(!path);
724         ret = btrfs_lookup_inode(trans, root, path,
725                                  &BTRFS_I(inode)->location, 1);
726         if (ret) {
727                 if (ret > 0)
728                         ret = -ENOENT;
729                 goto failed;
730         }
731
732         leaf = path->nodes[0];
733         inode_item = btrfs_item_ptr(leaf, path->slots[0],
734                                   struct btrfs_inode_item);
735
736         fill_inode_item(leaf, inode_item, inode);
737         btrfs_mark_buffer_dirty(leaf);
738         btrfs_set_inode_last_trans(trans, inode);
739         ret = 0;
740 failed:
741         btrfs_release_path(root, path);
742         btrfs_free_path(path);
743         return ret;
744 }
745
746
747 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
748                               struct btrfs_root *root,
749                               struct inode *dir,
750                               struct dentry *dentry)
751 {
752         struct btrfs_path *path;
753         const char *name = dentry->d_name.name;
754         int name_len = dentry->d_name.len;
755         int ret = 0;
756         struct extent_buffer *leaf;
757         struct btrfs_dir_item *di;
758         struct btrfs_key key;
759
760         path = btrfs_alloc_path();
761         if (!path) {
762                 ret = -ENOMEM;
763                 goto err;
764         }
765
766         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
767                                     name, name_len, -1);
768         if (IS_ERR(di)) {
769                 ret = PTR_ERR(di);
770                 goto err;
771         }
772         if (!di) {
773                 ret = -ENOENT;
774                 goto err;
775         }
776         leaf = path->nodes[0];
777         btrfs_dir_item_key_to_cpu(leaf, di, &key);
778         ret = btrfs_delete_one_dir_name(trans, root, path, di);
779         if (ret)
780                 goto err;
781         btrfs_release_path(root, path);
782
783         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
784                                          key.objectid, name, name_len, -1);
785         if (IS_ERR(di)) {
786                 ret = PTR_ERR(di);
787                 goto err;
788         }
789         if (!di) {
790                 ret = -ENOENT;
791                 goto err;
792         }
793         ret = btrfs_delete_one_dir_name(trans, root, path, di);
794
795         dentry->d_inode->i_ctime = dir->i_ctime;
796         ret = btrfs_del_inode_ref(trans, root, name, name_len,
797                                   dentry->d_inode->i_ino,
798                                   dentry->d_parent->d_inode->i_ino);
799         if (ret) {
800                 printk("failed to delete reference to %.*s, "
801                        "inode %lu parent %lu\n", name_len, name,
802                        dentry->d_inode->i_ino,
803                        dentry->d_parent->d_inode->i_ino);
804         }
805 err:
806         btrfs_free_path(path);
807         if (!ret) {
808                 dir->i_size -= name_len * 2;
809                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
810                 btrfs_update_inode(trans, root, dir);
811 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
812                 dentry->d_inode->i_nlink--;
813 #else
814                 drop_nlink(dentry->d_inode);
815 #endif
816                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
817                 dir->i_sb->s_dirt = 1;
818         }
819         return ret;
820 }
821
822 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
823 {
824         struct btrfs_root *root;
825         struct btrfs_trans_handle *trans;
826         struct inode *inode = dentry->d_inode;
827         int ret;
828         unsigned long nr = 0;
829
830         root = BTRFS_I(dir)->root;
831         mutex_lock(&root->fs_info->fs_mutex);
832
833         ret = btrfs_check_free_space(root, 1, 1);
834         if (ret)
835                 goto fail;
836
837         trans = btrfs_start_transaction(root, 1);
838
839         btrfs_set_trans_block_group(trans, dir);
840         ret = btrfs_unlink_trans(trans, root, dir, dentry);
841         nr = trans->blocks_used;
842
843         if (inode->i_nlink == 0) {
844                 int found;
845                 /* if the inode isn't linked anywhere,
846                  * we don't need to worry about
847                  * data=ordered
848                  */
849                 found = btrfs_del_ordered_inode(inode);
850                 if (found == 1) {
851                         atomic_dec(&inode->i_count);
852                 }
853         }
854
855         btrfs_end_transaction(trans, root);
856 fail:
857         mutex_unlock(&root->fs_info->fs_mutex);
858         btrfs_btree_balance_dirty(root, nr);
859         btrfs_throttle(root);
860         return ret;
861 }
862
863 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
864 {
865         struct inode *inode = dentry->d_inode;
866         int err = 0;
867         int ret;
868         struct btrfs_root *root = BTRFS_I(dir)->root;
869         struct btrfs_trans_handle *trans;
870         unsigned long nr = 0;
871
872         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
873                 return -ENOTEMPTY;
874
875         mutex_lock(&root->fs_info->fs_mutex);
876         ret = btrfs_check_free_space(root, 1, 1);
877         if (ret)
878                 goto fail;
879
880         trans = btrfs_start_transaction(root, 1);
881         btrfs_set_trans_block_group(trans, dir);
882
883         /* now the directory is empty */
884         err = btrfs_unlink_trans(trans, root, dir, dentry);
885         if (!err) {
886                 inode->i_size = 0;
887         }
888
889         nr = trans->blocks_used;
890         ret = btrfs_end_transaction(trans, root);
891 fail:
892         mutex_unlock(&root->fs_info->fs_mutex);
893         btrfs_btree_balance_dirty(root, nr);
894         btrfs_throttle(root);
895
896         if (ret && !err)
897                 err = ret;
898         return err;
899 }
900
901 /*
902  * this can truncate away extent items, csum items and directory items.
903  * It starts at a high offset and removes keys until it can't find
904  * any higher than i_size.
905  *
906  * csum items that cross the new i_size are truncated to the new size
907  * as well.
908  */
909 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
910                                    struct btrfs_root *root,
911                                    struct inode *inode,
912                                    u32 min_type)
913 {
914         int ret;
915         struct btrfs_path *path;
916         struct btrfs_key key;
917         struct btrfs_key found_key;
918         u32 found_type;
919         struct extent_buffer *leaf;
920         struct btrfs_file_extent_item *fi;
921         u64 extent_start = 0;
922         u64 extent_num_bytes = 0;
923         u64 item_end = 0;
924         u64 root_gen = 0;
925         u64 root_owner = 0;
926         int found_extent;
927         int del_item;
928         int pending_del_nr = 0;
929         int pending_del_slot = 0;
930         int extent_type = -1;
931         u64 mask = root->sectorsize - 1;
932
933         btrfs_drop_extent_cache(inode, inode->i_size & (~mask), (u64)-1);
934         path = btrfs_alloc_path();
935         path->reada = -1;
936         BUG_ON(!path);
937
938         /* FIXME, add redo link to tree so we don't leak on crash */
939         key.objectid = inode->i_ino;
940         key.offset = (u64)-1;
941         key.type = (u8)-1;
942
943         btrfs_init_path(path);
944 search_again:
945         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
946         if (ret < 0) {
947                 goto error;
948         }
949         if (ret > 0) {
950                 BUG_ON(path->slots[0] == 0);
951                 path->slots[0]--;
952         }
953
954         while(1) {
955                 fi = NULL;
956                 leaf = path->nodes[0];
957                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
958                 found_type = btrfs_key_type(&found_key);
959
960                 if (found_key.objectid != inode->i_ino)
961                         break;
962
963                 if (found_type < min_type)
964                         break;
965
966                 item_end = found_key.offset;
967                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
968                         fi = btrfs_item_ptr(leaf, path->slots[0],
969                                             struct btrfs_file_extent_item);
970                         extent_type = btrfs_file_extent_type(leaf, fi);
971                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
972                                 item_end +=
973                                     btrfs_file_extent_num_bytes(leaf, fi);
974                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
975                                 struct btrfs_item *item = btrfs_item_nr(leaf,
976                                                                 path->slots[0]);
977                                 item_end += btrfs_file_extent_inline_len(leaf,
978                                                                          item);
979                         }
980                         item_end--;
981                 }
982                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
983                         ret = btrfs_csum_truncate(trans, root, path,
984                                                   inode->i_size);
985                         BUG_ON(ret);
986                 }
987                 if (item_end < inode->i_size) {
988                         if (found_type == BTRFS_DIR_ITEM_KEY) {
989                                 found_type = BTRFS_INODE_ITEM_KEY;
990                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
991                                 found_type = BTRFS_CSUM_ITEM_KEY;
992                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
993                                 found_type = BTRFS_XATTR_ITEM_KEY;
994                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
995                                 found_type = BTRFS_INODE_REF_KEY;
996                         } else if (found_type) {
997                                 found_type--;
998                         } else {
999                                 break;
1000                         }
1001                         btrfs_set_key_type(&key, found_type);
1002                         goto next;
1003                 }
1004                 if (found_key.offset >= inode->i_size)
1005                         del_item = 1;
1006                 else
1007                         del_item = 0;
1008                 found_extent = 0;
1009
1010                 /* FIXME, shrink the extent if the ref count is only 1 */
1011                 if (found_type != BTRFS_EXTENT_DATA_KEY)
1012                         goto delete;
1013
1014                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
1015                         u64 num_dec;
1016                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
1017                         if (!del_item) {
1018                                 u64 orig_num_bytes =
1019                                         btrfs_file_extent_num_bytes(leaf, fi);
1020                                 extent_num_bytes = inode->i_size -
1021                                         found_key.offset + root->sectorsize - 1;
1022                                 extent_num_bytes = extent_num_bytes &
1023                                         ~((u64)root->sectorsize - 1);
1024                                 btrfs_set_file_extent_num_bytes(leaf, fi,
1025                                                          extent_num_bytes);
1026                                 num_dec = (orig_num_bytes -
1027                                            extent_num_bytes);
1028                                 if (extent_start != 0)
1029                                         dec_i_blocks(inode, num_dec);
1030                                 btrfs_mark_buffer_dirty(leaf);
1031                         } else {
1032                                 extent_num_bytes =
1033                                         btrfs_file_extent_disk_num_bytes(leaf,
1034                                                                          fi);
1035                                 /* FIXME blocksize != 4096 */
1036                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
1037                                 if (extent_start != 0) {
1038                                         found_extent = 1;
1039                                         dec_i_blocks(inode, num_dec);
1040                                 }
1041                                 root_gen = btrfs_header_generation(leaf);
1042                                 root_owner = btrfs_header_owner(leaf);
1043                         }
1044                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1045                         if (!del_item) {
1046                                 u32 newsize = inode->i_size - found_key.offset;
1047                                 dec_i_blocks(inode, item_end + 1 -
1048                                             found_key.offset - newsize);
1049                                 newsize =
1050                                     btrfs_file_extent_calc_inline_size(newsize);
1051                                 ret = btrfs_truncate_item(trans, root, path,
1052                                                           newsize, 1);
1053                                 BUG_ON(ret);
1054                         } else {
1055                                 dec_i_blocks(inode, item_end + 1 -
1056                                              found_key.offset);
1057                         }
1058                 }
1059 delete:
1060                 if (del_item) {
1061                         if (!pending_del_nr) {
1062                                 /* no pending yet, add ourselves */
1063                                 pending_del_slot = path->slots[0];
1064                                 pending_del_nr = 1;
1065                         } else if (pending_del_nr &&
1066                                    path->slots[0] + 1 == pending_del_slot) {
1067                                 /* hop on the pending chunk */
1068                                 pending_del_nr++;
1069                                 pending_del_slot = path->slots[0];
1070                         } else {
1071                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
1072                         }
1073                 } else {
1074                         break;
1075                 }
1076                 if (found_extent) {
1077                         ret = btrfs_free_extent(trans, root, extent_start,
1078                                                 extent_num_bytes,
1079                                                 root_owner,
1080                                                 root_gen, inode->i_ino,
1081                                                 found_key.offset, 0);
1082                         BUG_ON(ret);
1083                 }
1084 next:
1085                 if (path->slots[0] == 0) {
1086                         if (pending_del_nr)
1087                                 goto del_pending;
1088                         btrfs_release_path(root, path);
1089                         goto search_again;
1090                 }
1091
1092                 path->slots[0]--;
1093                 if (pending_del_nr &&
1094                     path->slots[0] + 1 != pending_del_slot) {
1095                         struct btrfs_key debug;
1096 del_pending:
1097                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
1098                                               pending_del_slot);
1099                         ret = btrfs_del_items(trans, root, path,
1100                                               pending_del_slot,
1101                                               pending_del_nr);
1102                         BUG_ON(ret);
1103                         pending_del_nr = 0;
1104                         btrfs_release_path(root, path);
1105                         goto search_again;
1106                 }
1107         }
1108         ret = 0;
1109 error:
1110         if (pending_del_nr) {
1111                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
1112                                       pending_del_nr);
1113         }
1114         btrfs_release_path(root, path);
1115         btrfs_free_path(path);
1116         inode->i_sb->s_dirt = 1;
1117         return ret;
1118 }
1119
1120 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
1121                               size_t zero_start)
1122 {
1123         char *kaddr;
1124         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1125         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1126         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1127         int ret = 0;
1128
1129         WARN_ON(!PageLocked(page));
1130         set_page_extent_mapped(page);
1131
1132         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1133         set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1134                             page_end, GFP_NOFS);
1135
1136         if (zero_start != PAGE_CACHE_SIZE) {
1137                 kaddr = kmap(page);
1138                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
1139                 flush_dcache_page(page);
1140                 kunmap(page);
1141         }
1142         set_page_dirty(page);
1143         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1144
1145         return ret;
1146 }
1147
1148 /*
1149  * taken from block_truncate_page, but does cow as it zeros out
1150  * any bytes left in the last page in the file.
1151  */
1152 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1153 {
1154         struct inode *inode = mapping->host;
1155         struct btrfs_root *root = BTRFS_I(inode)->root;
1156         u32 blocksize = root->sectorsize;
1157         pgoff_t index = from >> PAGE_CACHE_SHIFT;
1158         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1159         struct page *page;
1160         int ret = 0;
1161         u64 page_start;
1162
1163         if ((offset & (blocksize - 1)) == 0)
1164                 goto out;
1165
1166         ret = -ENOMEM;
1167         page = grab_cache_page(mapping, index);
1168         if (!page)
1169                 goto out;
1170         if (!PageUptodate(page)) {
1171                 ret = btrfs_readpage(NULL, page);
1172                 lock_page(page);
1173                 if (!PageUptodate(page)) {
1174                         ret = -EIO;
1175                         goto out;
1176                 }
1177         }
1178         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1179
1180         ret = btrfs_cow_one_page(inode, page, offset);
1181
1182         unlock_page(page);
1183         page_cache_release(page);
1184 out:
1185         return ret;
1186 }
1187
1188 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1189 {
1190         struct inode *inode = dentry->d_inode;
1191         int err;
1192
1193         err = inode_change_ok(inode, attr);
1194         if (err)
1195                 return err;
1196
1197         if (S_ISREG(inode->i_mode) &&
1198             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1199                 struct btrfs_trans_handle *trans;
1200                 struct btrfs_root *root = BTRFS_I(inode)->root;
1201                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1202
1203                 u64 mask = root->sectorsize - 1;
1204                 u64 hole_start = (inode->i_size + mask) & ~mask;
1205                 u64 block_end = (attr->ia_size + mask) & ~mask;
1206                 u64 hole_size;
1207                 u64 alloc_hint = 0;
1208
1209                 if (attr->ia_size <= hole_start)
1210                         goto out;
1211
1212                 mutex_lock(&root->fs_info->fs_mutex);
1213                 err = btrfs_check_free_space(root, 1, 0);
1214                 mutex_unlock(&root->fs_info->fs_mutex);
1215                 if (err)
1216                         goto fail;
1217
1218                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1219
1220                 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1221                 hole_size = block_end - hole_start;
1222
1223                 mutex_lock(&root->fs_info->fs_mutex);
1224                 trans = btrfs_start_transaction(root, 1);
1225                 btrfs_set_trans_block_group(trans, inode);
1226                 err = btrfs_drop_extents(trans, root, inode,
1227                                          hole_start, block_end, hole_start,
1228                                          &alloc_hint);
1229
1230                 if (alloc_hint != EXTENT_MAP_INLINE) {
1231                         err = btrfs_insert_file_extent(trans, root,
1232                                                        inode->i_ino,
1233                                                        hole_start, 0, 0,
1234                                                        hole_size, 0);
1235                         btrfs_drop_extent_cache(inode, hole_start,
1236                                                 (u64)-1);
1237                         btrfs_check_file(root, inode);
1238                 }
1239                 btrfs_end_transaction(trans, root);
1240                 mutex_unlock(&root->fs_info->fs_mutex);
1241                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1242                 if (err)
1243                         return err;
1244         }
1245 out:
1246         err = inode_setattr(inode, attr);
1247 fail:
1248         return err;
1249 }
1250
1251 void btrfs_put_inode(struct inode *inode)
1252 {
1253         int ret;
1254
1255         if (!BTRFS_I(inode)->ordered_trans) {
1256                 return;
1257         }
1258
1259         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1260             mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1261                 return;
1262
1263         ret = btrfs_del_ordered_inode(inode);
1264         if (ret == 1) {
1265                 atomic_dec(&inode->i_count);
1266         }
1267 }
1268
1269 void btrfs_delete_inode(struct inode *inode)
1270 {
1271         struct btrfs_trans_handle *trans;
1272         struct btrfs_root *root = BTRFS_I(inode)->root;
1273         unsigned long nr;
1274         int ret;
1275
1276         truncate_inode_pages(&inode->i_data, 0);
1277         if (is_bad_inode(inode)) {
1278                 goto no_delete;
1279         }
1280
1281         inode->i_size = 0;
1282         mutex_lock(&root->fs_info->fs_mutex);
1283         trans = btrfs_start_transaction(root, 1);
1284
1285         btrfs_set_trans_block_group(trans, inode);
1286         ret = btrfs_truncate_in_trans(trans, root, inode, 0);
1287         if (ret)
1288                 goto no_delete_lock;
1289
1290         nr = trans->blocks_used;
1291         clear_inode(inode);
1292
1293         btrfs_end_transaction(trans, root);
1294         mutex_unlock(&root->fs_info->fs_mutex);
1295         btrfs_btree_balance_dirty(root, nr);
1296         btrfs_throttle(root);
1297         return;
1298
1299 no_delete_lock:
1300         nr = trans->blocks_used;
1301         btrfs_end_transaction(trans, root);
1302         mutex_unlock(&root->fs_info->fs_mutex);
1303         btrfs_btree_balance_dirty(root, nr);
1304         btrfs_throttle(root);
1305 no_delete:
1306         clear_inode(inode);
1307 }
1308
1309 /*
1310  * this returns the key found in the dir entry in the location pointer.
1311  * If no dir entries were found, location->objectid is 0.
1312  */
1313 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1314                                struct btrfs_key *location)
1315 {
1316         const char *name = dentry->d_name.name;
1317         int namelen = dentry->d_name.len;
1318         struct btrfs_dir_item *di;
1319         struct btrfs_path *path;
1320         struct btrfs_root *root = BTRFS_I(dir)->root;
1321         int ret = 0;
1322
1323         if (namelen == 1 && strcmp(name, ".") == 0) {
1324                 location->objectid = dir->i_ino;
1325                 location->type = BTRFS_INODE_ITEM_KEY;
1326                 location->offset = 0;
1327                 return 0;
1328         }
1329         path = btrfs_alloc_path();
1330         BUG_ON(!path);
1331
1332         if (namelen == 2 && strcmp(name, "..") == 0) {
1333                 struct btrfs_key key;
1334                 struct extent_buffer *leaf;
1335                 u32 nritems;
1336                 int slot;
1337
1338                 key.objectid = dir->i_ino;
1339                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1340                 key.offset = 0;
1341                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1342                 BUG_ON(ret == 0);
1343                 ret = 0;
1344
1345                 leaf = path->nodes[0];
1346                 slot = path->slots[0];
1347                 nritems = btrfs_header_nritems(leaf);
1348                 if (slot >= nritems)
1349                         goto out_err;
1350
1351                 btrfs_item_key_to_cpu(leaf, &key, slot);
1352                 if (key.objectid != dir->i_ino ||
1353                     key.type != BTRFS_INODE_REF_KEY) {
1354                         goto out_err;
1355                 }
1356                 location->objectid = key.offset;
1357                 location->type = BTRFS_INODE_ITEM_KEY;
1358                 location->offset = 0;
1359                 goto out;
1360         }
1361
1362         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1363                                     namelen, 0);
1364         if (IS_ERR(di))
1365                 ret = PTR_ERR(di);
1366         if (!di || IS_ERR(di)) {
1367                 goto out_err;
1368         }
1369         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1370 out:
1371         btrfs_free_path(path);
1372         return ret;
1373 out_err:
1374         location->objectid = 0;
1375         goto out;
1376 }
1377
1378 /*
1379  * when we hit a tree root in a directory, the btrfs part of the inode
1380  * needs to be changed to reflect the root directory of the tree root.  This
1381  * is kind of like crossing a mount point.
1382  */
1383 static int fixup_tree_root_location(struct btrfs_root *root,
1384                              struct btrfs_key *location,
1385                              struct btrfs_root **sub_root,
1386                              struct dentry *dentry)
1387 {
1388         struct btrfs_path *path;
1389         struct btrfs_root_item *ri;
1390
1391         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1392                 return 0;
1393         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1394                 return 0;
1395
1396         path = btrfs_alloc_path();
1397         BUG_ON(!path);
1398         mutex_lock(&root->fs_info->fs_mutex);
1399
1400         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1401                                         dentry->d_name.name,
1402                                         dentry->d_name.len);
1403         if (IS_ERR(*sub_root))
1404                 return PTR_ERR(*sub_root);
1405
1406         ri = &(*sub_root)->root_item;
1407         location->objectid = btrfs_root_dirid(ri);
1408         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1409         location->offset = 0;
1410
1411         btrfs_free_path(path);
1412         mutex_unlock(&root->fs_info->fs_mutex);
1413         return 0;
1414 }
1415
1416 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1417 {
1418         struct btrfs_iget_args *args = p;
1419         inode->i_ino = args->ino;
1420         BTRFS_I(inode)->root = args->root;
1421         BTRFS_I(inode)->delalloc_bytes = 0;
1422         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1423         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1424                              inode->i_mapping, GFP_NOFS);
1425         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1426                              inode->i_mapping, GFP_NOFS);
1427         atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1428         return 0;
1429 }
1430
1431 static int btrfs_find_actor(struct inode *inode, void *opaque)
1432 {
1433         struct btrfs_iget_args *args = opaque;
1434         return (args->ino == inode->i_ino &&
1435                 args->root == BTRFS_I(inode)->root);
1436 }
1437
1438 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1439                             u64 root_objectid)
1440 {
1441         struct btrfs_iget_args args;
1442         args.ino = objectid;
1443         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1444
1445         if (!args.root)
1446                 return NULL;
1447
1448         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1449 }
1450
1451 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1452                                 struct btrfs_root *root)
1453 {
1454         struct inode *inode;
1455         struct btrfs_iget_args args;
1456         args.ino = objectid;
1457         args.root = root;
1458
1459         inode = iget5_locked(s, objectid, btrfs_find_actor,
1460                              btrfs_init_locked_inode,
1461                              (void *)&args);
1462         return inode;
1463 }
1464
1465 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1466                                    struct nameidata *nd)
1467 {
1468         struct inode * inode;
1469         struct btrfs_inode *bi = BTRFS_I(dir);
1470         struct btrfs_root *root = bi->root;
1471         struct btrfs_root *sub_root = root;
1472         struct btrfs_key location;
1473         int ret;
1474
1475         if (dentry->d_name.len > BTRFS_NAME_LEN)
1476                 return ERR_PTR(-ENAMETOOLONG);
1477
1478         mutex_lock(&root->fs_info->fs_mutex);
1479         ret = btrfs_inode_by_name(dir, dentry, &location);
1480         mutex_unlock(&root->fs_info->fs_mutex);
1481
1482         if (ret < 0)
1483                 return ERR_PTR(ret);
1484
1485         inode = NULL;
1486         if (location.objectid) {
1487                 ret = fixup_tree_root_location(root, &location, &sub_root,
1488                                                 dentry);
1489                 if (ret < 0)
1490                         return ERR_PTR(ret);
1491                 if (ret > 0)
1492                         return ERR_PTR(-ENOENT);
1493                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1494                                           sub_root);
1495                 if (!inode)
1496                         return ERR_PTR(-EACCES);
1497                 if (inode->i_state & I_NEW) {
1498                         /* the inode and parent dir are two different roots */
1499                         if (sub_root != root) {
1500                                 igrab(inode);
1501                                 sub_root->inode = inode;
1502                         }
1503                         BTRFS_I(inode)->root = sub_root;
1504                         memcpy(&BTRFS_I(inode)->location, &location,
1505                                sizeof(location));
1506                         btrfs_read_locked_inode(inode);
1507                         unlock_new_inode(inode);
1508                 }
1509         }
1510         return d_splice_alias(inode, dentry);
1511 }
1512
1513 static unsigned char btrfs_filetype_table[] = {
1514         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1515 };
1516
1517 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1518 {
1519         struct inode *inode = filp->f_dentry->d_inode;
1520         struct btrfs_root *root = BTRFS_I(inode)->root;
1521         struct btrfs_item *item;
1522         struct btrfs_dir_item *di;
1523         struct btrfs_key key;
1524         struct btrfs_key found_key;
1525         struct btrfs_path *path;
1526         int ret;
1527         u32 nritems;
1528         struct extent_buffer *leaf;
1529         int slot;
1530         int advance;
1531         unsigned char d_type;
1532         int over = 0;
1533         u32 di_cur;
1534         u32 di_total;
1535         u32 di_len;
1536         int key_type = BTRFS_DIR_INDEX_KEY;
1537         char tmp_name[32];
1538         char *name_ptr;
1539         int name_len;
1540
1541         /* FIXME, use a real flag for deciding about the key type */
1542         if (root->fs_info->tree_root == root)
1543                 key_type = BTRFS_DIR_ITEM_KEY;
1544
1545         /* special case for "." */
1546         if (filp->f_pos == 0) {
1547                 over = filldir(dirent, ".", 1,
1548                                1, inode->i_ino,
1549                                DT_DIR);
1550                 if (over)
1551                         return 0;
1552                 filp->f_pos = 1;
1553         }
1554
1555         mutex_lock(&root->fs_info->fs_mutex);
1556         key.objectid = inode->i_ino;
1557         path = btrfs_alloc_path();
1558         path->reada = 2;
1559
1560         /* special case for .., just use the back ref */
1561         if (filp->f_pos == 1) {
1562                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1563                 key.offset = 0;
1564                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1565                 BUG_ON(ret == 0);
1566                 leaf = path->nodes[0];
1567                 slot = path->slots[0];
1568                 nritems = btrfs_header_nritems(leaf);
1569                 if (slot >= nritems) {
1570                         btrfs_release_path(root, path);
1571                         goto read_dir_items;
1572                 }
1573                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1574                 btrfs_release_path(root, path);
1575                 if (found_key.objectid != key.objectid ||
1576                     found_key.type != BTRFS_INODE_REF_KEY)
1577                         goto read_dir_items;
1578                 over = filldir(dirent, "..", 2,
1579                                2, found_key.offset, DT_DIR);
1580                 if (over)
1581                         goto nopos;
1582                 filp->f_pos = 2;
1583         }
1584
1585 read_dir_items:
1586         btrfs_set_key_type(&key, key_type);
1587         key.offset = filp->f_pos;
1588
1589         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1590         if (ret < 0)
1591                 goto err;
1592         advance = 0;
1593         while(1) {
1594                 leaf = path->nodes[0];
1595                 nritems = btrfs_header_nritems(leaf);
1596                 slot = path->slots[0];
1597                 if (advance || slot >= nritems) {
1598                         if (slot >= nritems -1) {
1599                                 ret = btrfs_next_leaf(root, path);
1600                                 if (ret)
1601                                         break;
1602                                 leaf = path->nodes[0];
1603                                 nritems = btrfs_header_nritems(leaf);
1604                                 slot = path->slots[0];
1605                         } else {
1606                                 slot++;
1607                                 path->slots[0]++;
1608                         }
1609                 }
1610                 advance = 1;
1611                 item = btrfs_item_nr(leaf, slot);
1612                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1613
1614                 if (found_key.objectid != key.objectid)
1615                         break;
1616                 if (btrfs_key_type(&found_key) != key_type)
1617                         break;
1618                 if (found_key.offset < filp->f_pos)
1619                         continue;
1620
1621                 filp->f_pos = found_key.offset;
1622                 advance = 1;
1623                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1624                 di_cur = 0;
1625                 di_total = btrfs_item_size(leaf, item);
1626                 while(di_cur < di_total) {
1627                         struct btrfs_key location;
1628
1629                         name_len = btrfs_dir_name_len(leaf, di);
1630                         if (name_len < 32) {
1631                                 name_ptr = tmp_name;
1632                         } else {
1633                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1634                                 BUG_ON(!name_ptr);
1635                         }
1636                         read_extent_buffer(leaf, name_ptr,
1637                                            (unsigned long)(di + 1), name_len);
1638
1639                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1640                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1641                         over = filldir(dirent, name_ptr, name_len,
1642                                        found_key.offset,
1643                                        location.objectid,
1644                                        d_type);
1645
1646                         if (name_ptr != tmp_name)
1647                                 kfree(name_ptr);
1648
1649                         if (over)
1650                                 goto nopos;
1651                         di_len = btrfs_dir_name_len(leaf, di) +
1652                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1653                         di_cur += di_len;
1654                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1655                 }
1656         }
1657         if (key_type == BTRFS_DIR_INDEX_KEY)
1658                 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
1659         else
1660                 filp->f_pos++;
1661 nopos:
1662         ret = 0;
1663 err:
1664         btrfs_release_path(root, path);
1665         btrfs_free_path(path);
1666         mutex_unlock(&root->fs_info->fs_mutex);
1667         return ret;
1668 }
1669
1670 int btrfs_write_inode(struct inode *inode, int wait)
1671 {
1672         struct btrfs_root *root = BTRFS_I(inode)->root;
1673         struct btrfs_trans_handle *trans;
1674         int ret = 0;
1675
1676         if (wait) {
1677                 mutex_lock(&root->fs_info->fs_mutex);
1678                 trans = btrfs_start_transaction(root, 1);
1679                 btrfs_set_trans_block_group(trans, inode);
1680                 ret = btrfs_commit_transaction(trans, root);
1681                 mutex_unlock(&root->fs_info->fs_mutex);
1682         }
1683         return ret;
1684 }
1685
1686 /*
1687  * This is somewhat expensive, updating the tree every time the
1688  * inode changes.  But, it is most likely to find the inode in cache.
1689  * FIXME, needs more benchmarking...there are no reasons other than performance
1690  * to keep or drop this code.
1691  */
1692 void btrfs_dirty_inode(struct inode *inode)
1693 {
1694         struct btrfs_root *root = BTRFS_I(inode)->root;
1695         struct btrfs_trans_handle *trans;
1696
1697         mutex_lock(&root->fs_info->fs_mutex);
1698         trans = btrfs_start_transaction(root, 1);
1699         btrfs_set_trans_block_group(trans, inode);
1700         btrfs_update_inode(trans, root, inode);
1701         btrfs_end_transaction(trans, root);
1702         mutex_unlock(&root->fs_info->fs_mutex);
1703 }
1704
1705 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1706                                      struct btrfs_root *root,
1707                                      const char *name, int name_len,
1708                                      u64 ref_objectid,
1709                                      u64 objectid,
1710                                      struct btrfs_block_group_cache *group,
1711                                      int mode)
1712 {
1713         struct inode *inode;
1714         struct btrfs_inode_item *inode_item;
1715         struct btrfs_block_group_cache *new_inode_group;
1716         struct btrfs_key *location;
1717         struct btrfs_path *path;
1718         struct btrfs_inode_ref *ref;
1719         struct btrfs_key key[2];
1720         u32 sizes[2];
1721         unsigned long ptr;
1722         int ret;
1723         int owner;
1724
1725         path = btrfs_alloc_path();
1726         BUG_ON(!path);
1727
1728         inode = new_inode(root->fs_info->sb);
1729         if (!inode)
1730                 return ERR_PTR(-ENOMEM);
1731
1732         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1733         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1734                              inode->i_mapping, GFP_NOFS);
1735         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1736                              inode->i_mapping, GFP_NOFS);
1737         atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1738         BTRFS_I(inode)->delalloc_bytes = 0;
1739         BTRFS_I(inode)->root = root;
1740
1741         if (mode & S_IFDIR)
1742                 owner = 0;
1743         else
1744                 owner = 1;
1745         new_inode_group = btrfs_find_block_group(root, group, 0,
1746                                        BTRFS_BLOCK_GROUP_METADATA, owner);
1747         if (!new_inode_group) {
1748                 printk("find_block group failed\n");
1749                 new_inode_group = group;
1750         }
1751         BTRFS_I(inode)->block_group = new_inode_group;
1752         BTRFS_I(inode)->flags = 0;
1753
1754         key[0].objectid = objectid;
1755         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
1756         key[0].offset = 0;
1757
1758         key[1].objectid = objectid;
1759         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
1760         key[1].offset = ref_objectid;
1761
1762         sizes[0] = sizeof(struct btrfs_inode_item);
1763         sizes[1] = name_len + sizeof(*ref);
1764
1765         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
1766         if (ret != 0)
1767                 goto fail;
1768
1769         if (objectid > root->highest_inode)
1770                 root->highest_inode = objectid;
1771
1772         inode->i_uid = current->fsuid;
1773         inode->i_gid = current->fsgid;
1774         inode->i_mode = mode;
1775         inode->i_ino = objectid;
1776         inode->i_blocks = 0;
1777         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1778         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1779                                   struct btrfs_inode_item);
1780         fill_inode_item(path->nodes[0], inode_item, inode);
1781
1782         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1783                              struct btrfs_inode_ref);
1784         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
1785         ptr = (unsigned long)(ref + 1);
1786         write_extent_buffer(path->nodes[0], name, ptr, name_len);
1787
1788         btrfs_mark_buffer_dirty(path->nodes[0]);
1789         btrfs_free_path(path);
1790
1791         location = &BTRFS_I(inode)->location;
1792         location->objectid = objectid;
1793         location->offset = 0;
1794         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1795
1796         insert_inode_hash(inode);
1797         return inode;
1798 fail:
1799         btrfs_free_path(path);
1800         return ERR_PTR(ret);
1801 }
1802
1803 static inline u8 btrfs_inode_type(struct inode *inode)
1804 {
1805         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1806 }
1807
1808 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1809                             struct dentry *dentry, struct inode *inode,
1810                             int add_backref)
1811 {
1812         int ret;
1813         struct btrfs_key key;
1814         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1815         struct inode *parent_inode;
1816
1817         key.objectid = inode->i_ino;
1818         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1819         key.offset = 0;
1820
1821         ret = btrfs_insert_dir_item(trans, root,
1822                                     dentry->d_name.name, dentry->d_name.len,
1823                                     dentry->d_parent->d_inode->i_ino,
1824                                     &key, btrfs_inode_type(inode));
1825         if (ret == 0) {
1826                 if (add_backref) {
1827                         ret = btrfs_insert_inode_ref(trans, root,
1828                                              dentry->d_name.name,
1829                                              dentry->d_name.len,
1830                                              inode->i_ino,
1831                                              dentry->d_parent->d_inode->i_ino);
1832                 }
1833                 parent_inode = dentry->d_parent->d_inode;
1834                 parent_inode->i_size += dentry->d_name.len * 2;
1835                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1836                 ret = btrfs_update_inode(trans, root,
1837                                          dentry->d_parent->d_inode);
1838         }
1839         return ret;
1840 }
1841
1842 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1843                             struct dentry *dentry, struct inode *inode,
1844                             int backref)
1845 {
1846         int err = btrfs_add_link(trans, dentry, inode, backref);
1847         if (!err) {
1848                 d_instantiate(dentry, inode);
1849                 return 0;
1850         }
1851         if (err > 0)
1852                 err = -EEXIST;
1853         return err;
1854 }
1855
1856 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1857                         int mode, dev_t rdev)
1858 {
1859         struct btrfs_trans_handle *trans;
1860         struct btrfs_root *root = BTRFS_I(dir)->root;
1861         struct inode *inode = NULL;
1862         int err;
1863         int drop_inode = 0;
1864         u64 objectid;
1865         unsigned long nr = 0;
1866
1867         if (!new_valid_dev(rdev))
1868                 return -EINVAL;
1869
1870         mutex_lock(&root->fs_info->fs_mutex);
1871         err = btrfs_check_free_space(root, 1, 0);
1872         if (err)
1873                 goto fail;
1874
1875         trans = btrfs_start_transaction(root, 1);
1876         btrfs_set_trans_block_group(trans, dir);
1877
1878         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1879         if (err) {
1880                 err = -ENOSPC;
1881                 goto out_unlock;
1882         }
1883
1884         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1885                                 dentry->d_name.len,
1886                                 dentry->d_parent->d_inode->i_ino, objectid,
1887                                 BTRFS_I(dir)->block_group, mode);
1888         err = PTR_ERR(inode);
1889         if (IS_ERR(inode))
1890                 goto out_unlock;
1891
1892         btrfs_set_trans_block_group(trans, inode);
1893         err = btrfs_add_nondir(trans, dentry, inode, 0);
1894         if (err)
1895                 drop_inode = 1;
1896         else {
1897                 inode->i_op = &btrfs_special_inode_operations;
1898                 init_special_inode(inode, inode->i_mode, rdev);
1899                 btrfs_update_inode(trans, root, inode);
1900         }
1901         dir->i_sb->s_dirt = 1;
1902         btrfs_update_inode_block_group(trans, inode);
1903         btrfs_update_inode_block_group(trans, dir);
1904 out_unlock:
1905         nr = trans->blocks_used;
1906         btrfs_end_transaction(trans, root);
1907 fail:
1908         mutex_unlock(&root->fs_info->fs_mutex);
1909
1910         if (drop_inode) {
1911                 inode_dec_link_count(inode);
1912                 iput(inode);
1913         }
1914         btrfs_btree_balance_dirty(root, nr);
1915         btrfs_throttle(root);
1916         return err;
1917 }
1918
1919 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1920                         int mode, struct nameidata *nd)
1921 {
1922         struct btrfs_trans_handle *trans;
1923         struct btrfs_root *root = BTRFS_I(dir)->root;
1924         struct inode *inode = NULL;
1925         int err;
1926         int drop_inode = 0;
1927         unsigned long nr = 0;
1928         u64 objectid;
1929
1930         mutex_lock(&root->fs_info->fs_mutex);
1931         err = btrfs_check_free_space(root, 1, 0);
1932         if (err)
1933                 goto fail;
1934         trans = btrfs_start_transaction(root, 1);
1935         btrfs_set_trans_block_group(trans, dir);
1936
1937         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1938         if (err) {
1939                 err = -ENOSPC;
1940                 goto out_unlock;
1941         }
1942
1943         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1944                                 dentry->d_name.len,
1945                                 dentry->d_parent->d_inode->i_ino,
1946                                 objectid, BTRFS_I(dir)->block_group, mode);
1947         err = PTR_ERR(inode);
1948         if (IS_ERR(inode))
1949                 goto out_unlock;
1950
1951         btrfs_set_trans_block_group(trans, inode);
1952         err = btrfs_add_nondir(trans, dentry, inode, 0);
1953         if (err)
1954                 drop_inode = 1;
1955         else {
1956                 inode->i_mapping->a_ops = &btrfs_aops;
1957                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1958                 inode->i_fop = &btrfs_file_operations;
1959                 inode->i_op = &btrfs_file_inode_operations;
1960                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1961                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1962                                      inode->i_mapping, GFP_NOFS);
1963                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1964                                      inode->i_mapping, GFP_NOFS);
1965                 BTRFS_I(inode)->delalloc_bytes = 0;
1966                 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
1967                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1968         }
1969         dir->i_sb->s_dirt = 1;
1970         btrfs_update_inode_block_group(trans, inode);
1971         btrfs_update_inode_block_group(trans, dir);
1972 out_unlock:
1973         nr = trans->blocks_used;
1974         btrfs_end_transaction(trans, root);
1975 fail:
1976         mutex_unlock(&root->fs_info->fs_mutex);
1977
1978         if (drop_inode) {
1979                 inode_dec_link_count(inode);
1980                 iput(inode);
1981         }
1982         btrfs_btree_balance_dirty(root, nr);
1983         btrfs_throttle(root);
1984         return err;
1985 }
1986
1987 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1988                       struct dentry *dentry)
1989 {
1990         struct btrfs_trans_handle *trans;
1991         struct btrfs_root *root = BTRFS_I(dir)->root;
1992         struct inode *inode = old_dentry->d_inode;
1993         unsigned long nr = 0;
1994         int err;
1995         int drop_inode = 0;
1996
1997         if (inode->i_nlink == 0)
1998                 return -ENOENT;
1999
2000 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2001         inode->i_nlink++;
2002 #else
2003         inc_nlink(inode);
2004 #endif
2005         mutex_lock(&root->fs_info->fs_mutex);
2006         err = btrfs_check_free_space(root, 1, 0);
2007         if (err)
2008                 goto fail;
2009         trans = btrfs_start_transaction(root, 1);
2010
2011         btrfs_set_trans_block_group(trans, dir);
2012         atomic_inc(&inode->i_count);
2013         err = btrfs_add_nondir(trans, dentry, inode, 1);
2014
2015         if (err)
2016                 drop_inode = 1;
2017
2018         dir->i_sb->s_dirt = 1;
2019         btrfs_update_inode_block_group(trans, dir);
2020         err = btrfs_update_inode(trans, root, inode);
2021
2022         if (err)
2023                 drop_inode = 1;
2024
2025         nr = trans->blocks_used;
2026         btrfs_end_transaction(trans, root);
2027 fail:
2028         mutex_unlock(&root->fs_info->fs_mutex);
2029
2030         if (drop_inode) {
2031                 inode_dec_link_count(inode);
2032                 iput(inode);
2033         }
2034         btrfs_btree_balance_dirty(root, nr);
2035         btrfs_throttle(root);
2036         return err;
2037 }
2038
2039 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
2040 {
2041         struct inode *inode = NULL;
2042         struct btrfs_trans_handle *trans;
2043         struct btrfs_root *root = BTRFS_I(dir)->root;
2044         int err = 0;
2045         int drop_on_err = 0;
2046         u64 objectid = 0;
2047         unsigned long nr = 1;
2048
2049         mutex_lock(&root->fs_info->fs_mutex);
2050         err = btrfs_check_free_space(root, 1, 0);
2051         if (err)
2052                 goto out_unlock;
2053
2054         trans = btrfs_start_transaction(root, 1);
2055         btrfs_set_trans_block_group(trans, dir);
2056
2057         if (IS_ERR(trans)) {
2058                 err = PTR_ERR(trans);
2059                 goto out_unlock;
2060         }
2061
2062         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2063         if (err) {
2064                 err = -ENOSPC;
2065                 goto out_unlock;
2066         }
2067
2068         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
2069                                 dentry->d_name.len,
2070                                 dentry->d_parent->d_inode->i_ino, objectid,
2071                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
2072         if (IS_ERR(inode)) {
2073                 err = PTR_ERR(inode);
2074                 goto out_fail;
2075         }
2076
2077         drop_on_err = 1;
2078         inode->i_op = &btrfs_dir_inode_operations;
2079         inode->i_fop = &btrfs_dir_file_operations;
2080         btrfs_set_trans_block_group(trans, inode);
2081
2082         inode->i_size = 0;
2083         err = btrfs_update_inode(trans, root, inode);
2084         if (err)
2085                 goto out_fail;
2086
2087         err = btrfs_add_link(trans, dentry, inode, 0);
2088         if (err)
2089                 goto out_fail;
2090
2091         d_instantiate(dentry, inode);
2092         drop_on_err = 0;
2093         dir->i_sb->s_dirt = 1;
2094         btrfs_update_inode_block_group(trans, inode);
2095         btrfs_update_inode_block_group(trans, dir);
2096
2097 out_fail:
2098         nr = trans->blocks_used;
2099         btrfs_end_transaction(trans, root);
2100
2101 out_unlock:
2102         mutex_unlock(&root->fs_info->fs_mutex);
2103         if (drop_on_err)
2104                 iput(inode);
2105         btrfs_btree_balance_dirty(root, nr);
2106         btrfs_throttle(root);
2107         return err;
2108 }
2109
2110 static int merge_extent_mapping(struct extent_map_tree *em_tree,
2111                                 struct extent_map *existing,
2112                                 struct extent_map *em)
2113 {
2114         u64 start_diff;
2115         u64 new_end;
2116         int ret = 0;
2117         int real_blocks = existing->block_start < EXTENT_MAP_LAST_BYTE;
2118
2119         if (real_blocks && em->block_start >= EXTENT_MAP_LAST_BYTE)
2120                 goto invalid;
2121
2122         if (!real_blocks && em->block_start != existing->block_start)
2123                 goto invalid;
2124
2125         new_end = max(existing->start + existing->len, em->start + em->len);
2126
2127         if (existing->start >= em->start) {
2128                 if (em->start + em->len < existing->start)
2129                         goto invalid;
2130
2131                 start_diff = existing->start - em->start;
2132                 if (real_blocks && em->block_start + start_diff !=
2133                     existing->block_start)
2134                         goto invalid;
2135
2136                 em->len = new_end - em->start;
2137
2138                 remove_extent_mapping(em_tree, existing);
2139                 /* free for the tree */
2140                 free_extent_map(existing);
2141                 ret = add_extent_mapping(em_tree, em);
2142
2143         } else if (em->start > existing->start) {
2144
2145                 if (existing->start + existing->len < em->start)
2146                         goto invalid;
2147
2148                 start_diff = em->start - existing->start;
2149                 if (real_blocks && existing->block_start + start_diff !=
2150                     em->block_start)
2151                         goto invalid;
2152
2153                 remove_extent_mapping(em_tree, existing);
2154                 em->block_start = existing->block_start;
2155                 em->start = existing->start;
2156                 em->len = new_end - existing->start;
2157                 free_extent_map(existing);
2158
2159                 ret = add_extent_mapping(em_tree, em);
2160         } else {
2161                 goto invalid;
2162         }
2163         return ret;
2164
2165 invalid:
2166         printk("invalid extent map merge [%Lu %Lu %Lu] [%Lu %Lu %Lu]\n",
2167                existing->start, existing->len, existing->block_start,
2168                em->start, em->len, em->block_start);
2169         return -EIO;
2170 }
2171
2172 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2173                                     size_t pg_offset, u64 start, u64 len,
2174                                     int create)
2175 {
2176         int ret;
2177         int err = 0;
2178         u64 bytenr;
2179         u64 extent_start = 0;
2180         u64 extent_end = 0;
2181         u64 objectid = inode->i_ino;
2182         u32 found_type;
2183         struct btrfs_path *path;
2184         struct btrfs_root *root = BTRFS_I(inode)->root;
2185         struct btrfs_file_extent_item *item;
2186         struct extent_buffer *leaf;
2187         struct btrfs_key found_key;
2188         struct extent_map *em = NULL;
2189         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2190         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2191         struct btrfs_trans_handle *trans = NULL;
2192
2193         path = btrfs_alloc_path();
2194         BUG_ON(!path);
2195         mutex_lock(&root->fs_info->fs_mutex);
2196
2197 again:
2198         spin_lock(&em_tree->lock);
2199         em = lookup_extent_mapping(em_tree, start, len);
2200         if (em)
2201                 em->bdev = root->fs_info->fs_devices->latest_bdev;
2202         spin_unlock(&em_tree->lock);
2203
2204         if (em) {
2205                 if (em->start > start || em->start + em->len <= start)
2206                         free_extent_map(em);
2207                 else if (em->block_start == EXTENT_MAP_INLINE && page)
2208                         free_extent_map(em);
2209                 else
2210                         goto out;
2211         }
2212         em = alloc_extent_map(GFP_NOFS);
2213         if (!em) {
2214                 err = -ENOMEM;
2215                 goto out;
2216         }
2217
2218         em->start = EXTENT_MAP_HOLE;
2219         em->len = (u64)-1;
2220         em->bdev = root->fs_info->fs_devices->latest_bdev;
2221         ret = btrfs_lookup_file_extent(trans, root, path,
2222                                        objectid, start, trans != NULL);
2223         if (ret < 0) {
2224                 err = ret;
2225                 goto out;
2226         }
2227
2228         if (ret != 0) {
2229                 if (path->slots[0] == 0)
2230                         goto not_found;
2231                 path->slots[0]--;
2232         }
2233
2234         leaf = path->nodes[0];
2235         item = btrfs_item_ptr(leaf, path->slots[0],
2236                               struct btrfs_file_extent_item);
2237         /* are we inside the extent that was found? */
2238         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2239         found_type = btrfs_key_type(&found_key);
2240         if (found_key.objectid != objectid ||
2241             found_type != BTRFS_EXTENT_DATA_KEY) {
2242                 goto not_found;
2243         }
2244
2245         found_type = btrfs_file_extent_type(leaf, item);
2246         extent_start = found_key.offset;
2247         if (found_type == BTRFS_FILE_EXTENT_REG) {
2248                 extent_end = extent_start +
2249                        btrfs_file_extent_num_bytes(leaf, item);
2250                 err = 0;
2251                 if (start < extent_start || start >= extent_end) {
2252                         em->start = start;
2253                         if (start < extent_start) {
2254                                 if (start + len <= extent_start)
2255                                         goto not_found;
2256                                 em->len = extent_end - extent_start;
2257                         } else {
2258                                 em->len = len;
2259                         }
2260                         goto not_found_em;
2261                 }
2262                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
2263                 if (bytenr == 0) {
2264                         em->start = extent_start;
2265                         em->len = extent_end - extent_start;
2266                         em->block_start = EXTENT_MAP_HOLE;
2267                         goto insert;
2268                 }
2269                 bytenr += btrfs_file_extent_offset(leaf, item);
2270                 em->block_start = bytenr;
2271                 em->start = extent_start;
2272                 em->len = extent_end - extent_start;
2273                 goto insert;
2274         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
2275                 u64 page_start;
2276                 unsigned long ptr;
2277                 char *map;
2278                 size_t size;
2279                 size_t extent_offset;
2280                 size_t copy_size;
2281
2282                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
2283                                                     path->slots[0]));
2284                 extent_end = (extent_start + size + root->sectorsize - 1) &
2285                         ~((u64)root->sectorsize - 1);
2286                 if (start < extent_start || start >= extent_end) {
2287                         em->start = start;
2288                         if (start < extent_start) {
2289                                 if (start + len <= extent_start)
2290                                         goto not_found;
2291                                 em->len = extent_end - extent_start;
2292                         } else {
2293                                 em->len = len;
2294                         }
2295                         goto not_found_em;
2296                 }
2297                 em->block_start = EXTENT_MAP_INLINE;
2298
2299                 if (!page) {
2300                         em->start = extent_start;
2301                         em->len = size;
2302                         goto out;
2303                 }
2304
2305                 page_start = page_offset(page) + pg_offset;
2306                 extent_offset = page_start - extent_start;
2307                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
2308                                 size - extent_offset);
2309                 em->start = extent_start + extent_offset;
2310                 em->len = (copy_size + root->sectorsize - 1) &
2311                         ~((u64)root->sectorsize - 1);
2312                 map = kmap(page);
2313                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
2314                 if (create == 0 && !PageUptodate(page)) {
2315                         read_extent_buffer(leaf, map + pg_offset, ptr,
2316                                            copy_size);
2317                         flush_dcache_page(page);
2318                 } else if (create && PageUptodate(page)) {
2319                         if (!trans) {
2320                                 kunmap(page);
2321                                 free_extent_map(em);
2322                                 em = NULL;
2323                                 btrfs_release_path(root, path);
2324                                 trans = btrfs_start_transaction(root, 1);
2325                                 goto again;
2326                         }
2327                         write_extent_buffer(leaf, map + pg_offset, ptr,
2328                                             copy_size);
2329                         btrfs_mark_buffer_dirty(leaf);
2330                 }
2331                 kunmap(page);
2332                 set_extent_uptodate(io_tree, em->start,
2333                                     extent_map_end(em) - 1, GFP_NOFS);
2334                 goto insert;
2335         } else {
2336                 printk("unkknown found_type %d\n", found_type);
2337                 WARN_ON(1);
2338         }
2339 not_found:
2340         em->start = start;
2341         em->len = len;
2342 not_found_em:
2343         em->block_start = EXTENT_MAP_HOLE;
2344 insert:
2345         btrfs_release_path(root, path);
2346         if (em->start > start || extent_map_end(em) <= start) {
2347                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
2348                 err = -EIO;
2349                 goto out;
2350         }
2351
2352         err = 0;
2353         spin_lock(&em_tree->lock);
2354         ret = add_extent_mapping(em_tree, em);
2355         /* it is possible that someone inserted the extent into the tree
2356          * while we had the lock dropped.  It is also possible that
2357          * an overlapping map exists in the tree
2358          */
2359         if (ret == -EEXIST) {
2360                 struct extent_map *existing;
2361                 existing = lookup_extent_mapping(em_tree, start, len);
2362                 if (existing && (existing->start > start ||
2363                     existing->start + existing->len <= start)) {
2364                         free_extent_map(existing);
2365                         existing = NULL;
2366                 }
2367                 if (!existing) {
2368                         existing = lookup_extent_mapping(em_tree, em->start,
2369                                                          em->len);
2370                         if (existing) {
2371                                 err = merge_extent_mapping(em_tree, existing,
2372                                                            em);
2373                                 free_extent_map(existing);
2374                                 if (err) {
2375                                         free_extent_map(em);
2376                                         em = NULL;
2377                                 }
2378                         } else {
2379                                 err = -EIO;
2380                                 printk("failing to insert %Lu %Lu\n",
2381                                        start, len);
2382                                 free_extent_map(em);
2383                                 em = NULL;
2384                         }
2385                 } else {
2386                         free_extent_map(em);
2387                         em = existing;
2388                 }
2389         }
2390         spin_unlock(&em_tree->lock);
2391 out:
2392         btrfs_free_path(path);
2393         if (trans) {
2394                 ret = btrfs_end_transaction(trans, root);
2395                 if (!err)
2396                         err = ret;
2397         }
2398         mutex_unlock(&root->fs_info->fs_mutex);
2399         if (err) {
2400                 free_extent_map(em);
2401                 WARN_ON(1);
2402                 return ERR_PTR(err);
2403         }
2404         return em;
2405 }
2406
2407 #if 0 /* waiting for O_DIRECT reads */
2408 static int btrfs_get_block(struct inode *inode, sector_t iblock,
2409                         struct buffer_head *bh_result, int create)
2410 {
2411         struct extent_map *em;
2412         u64 start = (u64)iblock << inode->i_blkbits;
2413         struct btrfs_multi_bio *multi = NULL;
2414         struct btrfs_root *root = BTRFS_I(inode)->root;
2415         u64 len;
2416         u64 logical;
2417         u64 map_length;
2418         int ret = 0;
2419
2420         em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0);
2421
2422         if (!em || IS_ERR(em))
2423                 goto out;
2424
2425         if (em->start > start || em->start + em->len <= start) {
2426             goto out;
2427         }
2428
2429         if (em->block_start == EXTENT_MAP_INLINE) {
2430                 ret = -EINVAL;
2431                 goto out;
2432         }
2433
2434         len = em->start + em->len - start;
2435         len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size)));
2436
2437         if (em->block_start == EXTENT_MAP_HOLE ||
2438             em->block_start == EXTENT_MAP_DELALLOC) {
2439                 bh_result->b_size = len;
2440                 goto out;
2441         }
2442
2443         logical = start - em->start;
2444         logical = em->block_start + logical;
2445
2446         map_length = len;
2447         ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
2448                               logical, &map_length, &multi, 0);
2449         BUG_ON(ret);
2450         bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits;
2451         bh_result->b_size = min(map_length, len);
2452
2453         bh_result->b_bdev = multi->stripes[0].dev->bdev;
2454         set_buffer_mapped(bh_result);
2455         kfree(multi);
2456 out:
2457         free_extent_map(em);
2458         return ret;
2459 }
2460 #endif
2461
2462 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
2463                         const struct iovec *iov, loff_t offset,
2464                         unsigned long nr_segs)
2465 {
2466         return -EINVAL;
2467 #if 0
2468         struct file *file = iocb->ki_filp;
2469         struct inode *inode = file->f_mapping->host;
2470
2471         if (rw == WRITE)
2472                 return -EINVAL;
2473
2474         return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2475                                   offset, nr_segs, btrfs_get_block, NULL);
2476 #endif
2477 }
2478
2479 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2480 {
2481         return extent_bmap(mapping, iblock, btrfs_get_extent);
2482 }
2483
2484 int btrfs_readpage(struct file *file, struct page *page)
2485 {
2486         struct extent_io_tree *tree;
2487         tree = &BTRFS_I(page->mapping->host)->io_tree;
2488         return extent_read_full_page(tree, page, btrfs_get_extent);
2489 }
2490
2491 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2492 {
2493         struct extent_io_tree *tree;
2494
2495
2496         if (current->flags & PF_MEMALLOC) {
2497                 redirty_page_for_writepage(wbc, page);
2498                 unlock_page(page);
2499                 return 0;
2500         }
2501         tree = &BTRFS_I(page->mapping->host)->io_tree;
2502         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2503 }
2504
2505 static int btrfs_writepages(struct address_space *mapping,
2506                             struct writeback_control *wbc)
2507 {
2508         struct extent_io_tree *tree;
2509         tree = &BTRFS_I(mapping->host)->io_tree;
2510         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2511 }
2512
2513 static int
2514 btrfs_readpages(struct file *file, struct address_space *mapping,
2515                 struct list_head *pages, unsigned nr_pages)
2516 {
2517         struct extent_io_tree *tree;
2518         tree = &BTRFS_I(mapping->host)->io_tree;
2519         return extent_readpages(tree, mapping, pages, nr_pages,
2520                                 btrfs_get_extent);
2521 }
2522
2523 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2524 {
2525         struct extent_io_tree *tree;
2526         struct extent_map_tree *map;
2527         int ret;
2528
2529         tree = &BTRFS_I(page->mapping->host)->io_tree;
2530         map = &BTRFS_I(page->mapping->host)->extent_tree;
2531         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
2532         if (ret == 1) {
2533                 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
2534                 ClearPagePrivate(page);
2535                 set_page_private(page, 0);
2536                 page_cache_release(page);
2537         }
2538         return ret;
2539 }
2540
2541 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2542 {
2543         struct extent_io_tree *tree;
2544
2545         tree = &BTRFS_I(page->mapping->host)->io_tree;
2546         extent_invalidatepage(tree, page, offset);
2547         btrfs_releasepage(page, GFP_NOFS);
2548         if (PagePrivate(page)) {
2549                 invalidate_extent_lru(tree, page_offset(page), PAGE_CACHE_SIZE);
2550                 ClearPagePrivate(page);
2551                 set_page_private(page, 0);
2552                 page_cache_release(page);
2553         }
2554 }
2555
2556 /*
2557  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2558  * called from a page fault handler when a page is first dirtied. Hence we must
2559  * be careful to check for EOF conditions here. We set the page up correctly
2560  * for a written page which means we get ENOSPC checking when writing into
2561  * holes and correct delalloc and unwritten extent mapping on filesystems that
2562  * support these features.
2563  *
2564  * We are not allowed to take the i_mutex here so we have to play games to
2565  * protect against truncate races as the page could now be beyond EOF.  Because
2566  * vmtruncate() writes the inode size before removing pages, once we have the
2567  * page lock we can determine safely if the page is beyond EOF. If it is not
2568  * beyond EOF, then the page is guaranteed safe against truncation until we
2569  * unlock the page.
2570  */
2571 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2572 {
2573         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2574         struct btrfs_root *root = BTRFS_I(inode)->root;
2575         unsigned long end;
2576         loff_t size;
2577         int ret;
2578         u64 page_start;
2579
2580         mutex_lock(&root->fs_info->fs_mutex);
2581         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2582         mutex_unlock(&root->fs_info->fs_mutex);
2583         if (ret)
2584                 goto out;
2585
2586         ret = -EINVAL;
2587
2588         lock_page(page);
2589         wait_on_page_writeback(page);
2590         size = i_size_read(inode);
2591         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2592
2593         if ((page->mapping != inode->i_mapping) ||
2594             (page_start > size)) {
2595                 /* page got truncated out from underneath us */
2596                 goto out_unlock;
2597         }
2598
2599         /* page is wholly or partially inside EOF */
2600         if (page_start + PAGE_CACHE_SIZE > size)
2601                 end = size & ~PAGE_CACHE_MASK;
2602         else
2603                 end = PAGE_CACHE_SIZE;
2604
2605         ret = btrfs_cow_one_page(inode, page, end);
2606
2607 out_unlock:
2608         unlock_page(page);
2609 out:
2610         return ret;
2611 }
2612
2613 static void btrfs_truncate(struct inode *inode)
2614 {
2615         struct btrfs_root *root = BTRFS_I(inode)->root;
2616         int ret;
2617         struct btrfs_trans_handle *trans;
2618         unsigned long nr;
2619
2620         if (!S_ISREG(inode->i_mode))
2621                 return;
2622         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2623                 return;
2624
2625         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2626
2627         mutex_lock(&root->fs_info->fs_mutex);
2628         trans = btrfs_start_transaction(root, 1);
2629         btrfs_set_trans_block_group(trans, inode);
2630
2631         /* FIXME, add redo link to tree so we don't leak on crash */
2632         ret = btrfs_truncate_in_trans(trans, root, inode,
2633                                       BTRFS_EXTENT_DATA_KEY);
2634         btrfs_update_inode(trans, root, inode);
2635         nr = trans->blocks_used;
2636
2637         ret = btrfs_end_transaction(trans, root);
2638         BUG_ON(ret);
2639         mutex_unlock(&root->fs_info->fs_mutex);
2640         btrfs_btree_balance_dirty(root, nr);
2641         btrfs_throttle(root);
2642 }
2643
2644 static int noinline create_subvol(struct btrfs_root *root, char *name,
2645                                   int namelen)
2646 {
2647         struct btrfs_trans_handle *trans;
2648         struct btrfs_key key;
2649         struct btrfs_root_item root_item;
2650         struct btrfs_inode_item *inode_item;
2651         struct extent_buffer *leaf;
2652         struct btrfs_root *new_root = root;
2653         struct inode *inode;
2654         struct inode *dir;
2655         int ret;
2656         int err;
2657         u64 objectid;
2658         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2659         unsigned long nr = 1;
2660
2661         mutex_lock(&root->fs_info->fs_mutex);
2662         ret = btrfs_check_free_space(root, 1, 0);
2663         if (ret)
2664                 goto fail_commit;
2665
2666         trans = btrfs_start_transaction(root, 1);
2667         BUG_ON(!trans);
2668
2669         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2670                                        0, &objectid);
2671         if (ret)
2672                 goto fail;
2673
2674         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2675                                         objectid, trans->transid, 0, 0,
2676                                         0, 0);
2677         if (IS_ERR(leaf))
2678                 return PTR_ERR(leaf);
2679
2680         btrfs_set_header_nritems(leaf, 0);
2681         btrfs_set_header_level(leaf, 0);
2682         btrfs_set_header_bytenr(leaf, leaf->start);
2683         btrfs_set_header_generation(leaf, trans->transid);
2684         btrfs_set_header_owner(leaf, objectid);
2685
2686         write_extent_buffer(leaf, root->fs_info->fsid,
2687                             (unsigned long)btrfs_header_fsid(leaf),
2688                             BTRFS_FSID_SIZE);
2689         btrfs_mark_buffer_dirty(leaf);
2690
2691         inode_item = &root_item.inode;
2692         memset(inode_item, 0, sizeof(*inode_item));
2693         inode_item->generation = cpu_to_le64(1);
2694         inode_item->size = cpu_to_le64(3);
2695         inode_item->nlink = cpu_to_le32(1);
2696         inode_item->nblocks = cpu_to_le64(1);
2697         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2698
2699         btrfs_set_root_bytenr(&root_item, leaf->start);
2700         btrfs_set_root_level(&root_item, 0);
2701         btrfs_set_root_refs(&root_item, 1);
2702         btrfs_set_root_used(&root_item, 0);
2703
2704         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2705         root_item.drop_level = 0;
2706
2707         free_extent_buffer(leaf);
2708         leaf = NULL;
2709
2710         btrfs_set_root_dirid(&root_item, new_dirid);
2711
2712         key.objectid = objectid;
2713         key.offset = 1;
2714         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2715         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2716                                 &root_item);
2717         if (ret)
2718                 goto fail;
2719
2720         /*
2721          * insert the directory item
2722          */
2723         key.offset = (u64)-1;
2724         dir = root->fs_info->sb->s_root->d_inode;
2725         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2726                                     name, namelen, dir->i_ino, &key,
2727                                     BTRFS_FT_DIR);
2728         if (ret)
2729                 goto fail;
2730
2731         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2732                              name, namelen, objectid,
2733                              root->fs_info->sb->s_root->d_inode->i_ino);
2734         if (ret)
2735                 goto fail;
2736
2737         ret = btrfs_commit_transaction(trans, root);
2738         if (ret)
2739                 goto fail_commit;
2740
2741         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2742         BUG_ON(!new_root);
2743
2744         trans = btrfs_start_transaction(new_root, 1);
2745         BUG_ON(!trans);
2746
2747         inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid,
2748                                 new_dirid,
2749                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2750         if (IS_ERR(inode))
2751                 goto fail;
2752         inode->i_op = &btrfs_dir_inode_operations;
2753         inode->i_fop = &btrfs_dir_file_operations;
2754         new_root->inode = inode;
2755
2756         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2757                                      new_dirid);
2758         inode->i_nlink = 1;
2759         inode->i_size = 0;
2760         ret = btrfs_update_inode(trans, new_root, inode);
2761         if (ret)
2762                 goto fail;
2763 fail:
2764         nr = trans->blocks_used;
2765         err = btrfs_commit_transaction(trans, new_root);
2766         if (err && !ret)
2767                 ret = err;
2768 fail_commit:
2769         mutex_unlock(&root->fs_info->fs_mutex);
2770         btrfs_btree_balance_dirty(root, nr);
2771         btrfs_throttle(root);
2772         return ret;
2773 }
2774
2775 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2776 {
2777         struct btrfs_pending_snapshot *pending_snapshot;
2778         struct btrfs_trans_handle *trans;
2779         int ret;
2780         int err;
2781         unsigned long nr = 0;
2782
2783         if (!root->ref_cows)
2784                 return -EINVAL;
2785
2786         mutex_lock(&root->fs_info->fs_mutex);
2787         ret = btrfs_check_free_space(root, 1, 0);
2788         if (ret)
2789                 goto fail_unlock;
2790
2791         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2792         if (!pending_snapshot) {
2793                 ret = -ENOMEM;
2794                 goto fail_unlock;
2795         }
2796         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2797         if (!pending_snapshot->name) {
2798                 ret = -ENOMEM;
2799                 kfree(pending_snapshot);
2800                 goto fail_unlock;
2801         }
2802         memcpy(pending_snapshot->name, name, namelen);
2803         pending_snapshot->name[namelen] = '\0';
2804         trans = btrfs_start_transaction(root, 1);
2805         BUG_ON(!trans);
2806         pending_snapshot->root = root;
2807         list_add(&pending_snapshot->list,
2808                  &trans->transaction->pending_snapshots);
2809         ret = btrfs_update_inode(trans, root, root->inode);
2810         err = btrfs_commit_transaction(trans, root);
2811
2812 fail_unlock:
2813         mutex_unlock(&root->fs_info->fs_mutex);
2814         btrfs_btree_balance_dirty(root, nr);
2815         btrfs_throttle(root);
2816         return ret;
2817 }
2818
2819 unsigned long btrfs_force_ra(struct address_space *mapping,
2820                               struct file_ra_state *ra, struct file *file,
2821                               pgoff_t offset, pgoff_t last_index)
2822 {
2823         pgoff_t req_size = last_index - offset + 1;
2824
2825 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2826         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2827         return offset;
2828 #else
2829         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2830         return offset + req_size;
2831 #endif
2832 }
2833
2834 int btrfs_defrag_file(struct file *file) {
2835         struct inode *inode = fdentry(file)->d_inode;
2836         struct btrfs_root *root = BTRFS_I(inode)->root;
2837         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2838         struct page *page;
2839         unsigned long last_index;
2840         unsigned long ra_pages = root->fs_info->bdi.ra_pages;
2841         unsigned long total_read = 0;
2842         u64 page_start;
2843         u64 page_end;
2844         unsigned long i;
2845         int ret;
2846
2847         mutex_lock(&root->fs_info->fs_mutex);
2848         ret = btrfs_check_free_space(root, inode->i_size, 0);
2849         mutex_unlock(&root->fs_info->fs_mutex);
2850         if (ret)
2851                 return -ENOSPC;
2852
2853         mutex_lock(&inode->i_mutex);
2854         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2855         for (i = 0; i <= last_index; i++) {
2856                 if (total_read % ra_pages == 0) {
2857                         btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
2858                                        min(last_index, i + ra_pages - 1));
2859                 }
2860                 total_read++;
2861                 page = grab_cache_page(inode->i_mapping, i);
2862                 if (!page)
2863                         goto out_unlock;
2864                 if (!PageUptodate(page)) {
2865                         btrfs_readpage(NULL, page);
2866                         lock_page(page);
2867                         if (!PageUptodate(page)) {
2868                                 unlock_page(page);
2869                                 page_cache_release(page);
2870                                 goto out_unlock;
2871                         }
2872                 }
2873
2874 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
2875                 ClearPageDirty(page);
2876 #else
2877                 cancel_dirty_page(page, PAGE_CACHE_SIZE);
2878 #endif
2879                 wait_on_page_writeback(page);
2880                 set_page_extent_mapped(page);
2881
2882                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2883                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2884
2885                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2886                 set_extent_delalloc(io_tree, page_start,
2887                                     page_end, GFP_NOFS);
2888
2889                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2890                 set_page_dirty(page);
2891                 unlock_page(page);
2892                 page_cache_release(page);
2893                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2894         }
2895
2896 out_unlock:
2897         mutex_unlock(&inode->i_mutex);
2898         return 0;
2899 }
2900
2901 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2902 {
2903         u64 new_size;
2904         u64 old_size;
2905         u64 devid = 1;
2906         struct btrfs_ioctl_vol_args *vol_args;
2907         struct btrfs_trans_handle *trans;
2908         struct btrfs_device *device = NULL;
2909         char *sizestr;
2910         char *devstr = NULL;
2911         int ret = 0;
2912         int namelen;
2913         int mod = 0;
2914
2915         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2916
2917         if (!vol_args)
2918                 return -ENOMEM;
2919
2920         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2921                 ret = -EFAULT;
2922                 goto out;
2923         }
2924         namelen = strlen(vol_args->name);
2925         if (namelen > BTRFS_VOL_NAME_MAX) {
2926                 ret = -EINVAL;
2927                 goto out;
2928         }
2929
2930         mutex_lock(&root->fs_info->fs_mutex);
2931         sizestr = vol_args->name;
2932         devstr = strchr(sizestr, ':');
2933         if (devstr) {
2934                 char *end;
2935                 sizestr = devstr + 1;
2936                 *devstr = '\0';
2937                 devstr = vol_args->name;
2938                 devid = simple_strtoull(devstr, &end, 10);
2939 printk("resizing devid %Lu\n", devid);
2940         }
2941         device = btrfs_find_device(root, devid, NULL);
2942         if (!device) {
2943                 printk("resizer unable to find device %Lu\n", devid);
2944                 ret = -EINVAL;
2945                 goto out_unlock;
2946         }
2947         if (!strcmp(sizestr, "max"))
2948                 new_size = device->bdev->bd_inode->i_size;
2949         else {
2950                 if (sizestr[0] == '-') {
2951                         mod = -1;
2952                         sizestr++;
2953                 } else if (sizestr[0] == '+') {
2954                         mod = 1;
2955                         sizestr++;
2956                 }
2957                 new_size = btrfs_parse_size(sizestr);
2958                 if (new_size == 0) {
2959                         ret = -EINVAL;
2960                         goto out_unlock;
2961                 }
2962         }
2963
2964         old_size = device->total_bytes;
2965
2966         if (mod < 0) {
2967                 if (new_size > old_size) {
2968                         ret = -EINVAL;
2969                         goto out_unlock;
2970                 }
2971                 new_size = old_size - new_size;
2972         } else if (mod > 0) {
2973                 new_size = old_size + new_size;
2974         }
2975
2976         if (new_size < 256 * 1024 * 1024) {
2977                 ret = -EINVAL;
2978                 goto out_unlock;
2979         }
2980         if (new_size > device->bdev->bd_inode->i_size) {
2981                 ret = -EFBIG;
2982                 goto out_unlock;
2983         }
2984
2985         do_div(new_size, root->sectorsize);
2986         new_size *= root->sectorsize;
2987
2988 printk("new size for %s is %llu\n", device->name, (unsigned long long)new_size);
2989
2990         if (new_size > old_size) {
2991                 trans = btrfs_start_transaction(root, 1);
2992                 ret = btrfs_grow_device(trans, device, new_size);
2993                 btrfs_commit_transaction(trans, root);
2994         } else {
2995                 ret = btrfs_shrink_device(device, new_size);
2996         }
2997
2998 out_unlock:
2999         mutex_unlock(&root->fs_info->fs_mutex);
3000 out:
3001         kfree(vol_args);
3002         return ret;
3003 }
3004
3005 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
3006                                             void __user *arg)
3007 {
3008         struct btrfs_ioctl_vol_args *vol_args;
3009         struct btrfs_dir_item *di;
3010         struct btrfs_path *path;
3011         u64 root_dirid;
3012         int namelen;
3013         int ret;
3014
3015         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
3016
3017         if (!vol_args)
3018                 return -ENOMEM;
3019
3020         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
3021                 ret = -EFAULT;
3022                 goto out;
3023         }
3024
3025         namelen = strlen(vol_args->name);
3026         if (namelen > BTRFS_VOL_NAME_MAX) {
3027                 ret = -EINVAL;
3028                 goto out;
3029         }
3030         if (strchr(vol_args->name, '/')) {
3031                 ret = -EINVAL;
3032                 goto out;
3033         }
3034
3035         path = btrfs_alloc_path();
3036         if (!path) {
3037                 ret = -ENOMEM;
3038                 goto out;
3039         }
3040
3041         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
3042         mutex_lock(&root->fs_info->fs_mutex);
3043         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
3044                             path, root_dirid,
3045                             vol_args->name, namelen, 0);
3046         mutex_unlock(&root->fs_info->fs_mutex);
3047         btrfs_free_path(path);
3048
3049         if (di && !IS_ERR(di)) {
3050                 ret = -EEXIST;
3051                 goto out;
3052         }
3053
3054         if (IS_ERR(di)) {
3055                 ret = PTR_ERR(di);
3056                 goto out;
3057         }
3058
3059         if (root == root->fs_info->tree_root)
3060                 ret = create_subvol(root, vol_args->name, namelen);
3061         else
3062                 ret = create_snapshot(root, vol_args->name, namelen);
3063 out:
3064         kfree(vol_args);
3065         return ret;
3066 }
3067
3068 static int btrfs_ioctl_defrag(struct file *file)
3069 {
3070         struct inode *inode = fdentry(file)->d_inode;
3071         struct btrfs_root *root = BTRFS_I(inode)->root;
3072
3073         switch (inode->i_mode & S_IFMT) {
3074         case S_IFDIR:
3075                 mutex_lock(&root->fs_info->fs_mutex);
3076                 btrfs_defrag_root(root, 0);
3077                 btrfs_defrag_root(root->fs_info->extent_root, 0);
3078                 mutex_unlock(&root->fs_info->fs_mutex);
3079                 break;
3080         case S_IFREG:
3081                 btrfs_defrag_file(file);
3082                 break;
3083         }
3084
3085         return 0;
3086 }
3087
3088 long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
3089 {
3090         struct btrfs_ioctl_vol_args *vol_args;
3091         int ret;
3092
3093         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
3094
3095         if (!vol_args)
3096                 return -ENOMEM;
3097
3098         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
3099                 ret = -EFAULT;
3100                 goto out;
3101         }
3102         ret = btrfs_init_new_device(root, vol_args->name);
3103
3104 out:
3105         kfree(vol_args);
3106         return ret;
3107 }
3108
3109 long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
3110 {
3111         struct btrfs_ioctl_vol_args *vol_args;
3112         int ret;
3113
3114         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
3115
3116         if (!vol_args)
3117                 return -ENOMEM;
3118
3119         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
3120                 ret = -EFAULT;
3121                 goto out;
3122         }
3123         ret = btrfs_rm_device(root, vol_args->name);
3124
3125 out:
3126         kfree(vol_args);
3127         return ret;
3128 }
3129
3130 int dup_item_to_inode(struct btrfs_trans_handle *trans,
3131                        struct btrfs_root *root,
3132                        struct btrfs_path *path,
3133                        struct extent_buffer *leaf,
3134                        int slot,
3135                        struct btrfs_key *key,
3136                        u64 destino)
3137 {
3138         char *dup;
3139         int len = btrfs_item_size_nr(leaf, slot);
3140         struct btrfs_key ckey = *key;
3141         int ret = 0;
3142
3143         dup = kmalloc(len, GFP_NOFS);
3144         if (!dup)
3145                 return -ENOMEM;
3146
3147         read_extent_buffer(leaf, dup, btrfs_item_ptr_offset(leaf, slot), len);
3148         btrfs_release_path(root, path);
3149
3150         ckey.objectid = destino;
3151         ret = btrfs_insert_item(trans, root, &ckey, dup, len);
3152         kfree(dup);
3153         return ret;
3154 }
3155
3156 long btrfs_ioctl_clone(struct file *file, unsigned long src_fd)
3157 {
3158         struct inode *inode = fdentry(file)->d_inode;
3159         struct btrfs_root *root = BTRFS_I(inode)->root;
3160         struct file *src_file;
3161         struct inode *src;
3162         struct btrfs_trans_handle *trans;
3163         int ret;
3164         u64 pos;
3165         struct btrfs_path *path;
3166         struct btrfs_key key;
3167         struct extent_buffer *leaf;
3168         u32 nritems;
3169         int slot;
3170
3171         src_file = fget(src_fd);
3172         if (!src_file)
3173                 return -EBADF;
3174         src = src_file->f_dentry->d_inode;
3175
3176         ret = -EXDEV;
3177         if (src->i_sb != inode->i_sb)
3178                 goto out_fput;
3179
3180         if (inode < src) {
3181                 mutex_lock(&inode->i_mutex);
3182                 mutex_lock(&src->i_mutex);
3183         } else {
3184                 mutex_lock(&src->i_mutex);
3185                 mutex_lock(&inode->i_mutex);
3186         }
3187
3188         ret = -ENOTEMPTY;
3189         if (inode->i_size)
3190                 goto out_unlock;
3191
3192         /* do any pending delalloc/csum calc on src, one way or
3193            another, and lock file content */
3194         while (1) {
3195                 filemap_write_and_wait(src->i_mapping);
3196                 lock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
3197                 if (BTRFS_I(src)->delalloc_bytes == 0)
3198                         break;
3199                 unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
3200         }
3201
3202         mutex_lock(&root->fs_info->fs_mutex);
3203         trans = btrfs_start_transaction(root, 0);
3204         path = btrfs_alloc_path();
3205         if (!path) {
3206                 ret = -ENOMEM;
3207                 goto out;
3208         }
3209         key.offset = 0;
3210         key.type = BTRFS_EXTENT_DATA_KEY;
3211         key.objectid = src->i_ino;
3212         pos = 0;
3213         path->reada = 2;
3214
3215         while (1) {
3216                 /*
3217                  * note the key will change type as we walk through the
3218                  * tree.
3219                  */
3220                 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
3221                 if (ret < 0)
3222                         goto out;
3223
3224                 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
3225                         ret = btrfs_next_leaf(root, path);
3226                         if (ret < 0)
3227                                 goto out;
3228                         if (ret > 0)
3229                                 break;
3230                 }
3231                 leaf = path->nodes[0];
3232                 slot = path->slots[0];
3233                 btrfs_item_key_to_cpu(leaf, &key, slot);
3234                 nritems = btrfs_header_nritems(leaf);
3235
3236                 if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
3237                     key.objectid != src->i_ino)
3238                         break;
3239
3240                 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
3241                         struct btrfs_file_extent_item *extent;
3242                         int found_type;
3243                         pos = key.offset;
3244                         extent = btrfs_item_ptr(leaf, slot,
3245                                                 struct btrfs_file_extent_item);
3246                         found_type = btrfs_file_extent_type(leaf, extent);
3247                         if (found_type == BTRFS_FILE_EXTENT_REG) {
3248                                 u64 len = btrfs_file_extent_num_bytes(leaf,
3249                                                                       extent);
3250                                 u64 ds = btrfs_file_extent_disk_bytenr(leaf,
3251                                                                        extent);
3252                                 u64 dl = btrfs_file_extent_disk_num_bytes(leaf,
3253                                                                  extent);
3254                                 u64 off = btrfs_file_extent_offset(leaf,
3255                                                                    extent);
3256                                 btrfs_insert_file_extent(trans, root,
3257                                                          inode->i_ino, pos,
3258                                                          ds, dl, len, off);
3259                                 /* ds == 0 means there's a hole */
3260                                 if (ds != 0) {
3261                                         btrfs_inc_extent_ref(trans, root,
3262                                                      ds, dl,
3263                                                      root->root_key.objectid,
3264                                                      trans->transid,
3265                                                      inode->i_ino, pos);
3266                                 }
3267                                 pos = key.offset + len;
3268                         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
3269                                 ret = dup_item_to_inode(trans, root, path,
3270                                                         leaf, slot, &key,
3271                                                         inode->i_ino);
3272                                 if (ret)
3273                                         goto out;
3274                                 pos = key.offset + btrfs_item_size_nr(leaf,
3275                                                                       slot);
3276                         }
3277                 } else if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
3278                         ret = dup_item_to_inode(trans, root, path, leaf,
3279                                                 slot, &key, inode->i_ino);
3280
3281                         if (ret)
3282                                 goto out;
3283                 }
3284                 key.offset++;
3285                 btrfs_release_path(root, path);
3286         }
3287
3288         ret = 0;
3289 out:
3290         btrfs_free_path(path);
3291
3292         inode->i_blocks = src->i_blocks;
3293         i_size_write(inode, src->i_size);
3294         btrfs_update_inode(trans, root, inode);
3295
3296         unlock_extent(&BTRFS_I(src)->io_tree, 0, (u64)-1, GFP_NOFS);
3297
3298         btrfs_end_transaction(trans, root);
3299         mutex_unlock(&root->fs_info->fs_mutex);
3300
3301 out_unlock:
3302         mutex_unlock(&src->i_mutex);
3303         mutex_unlock(&inode->i_mutex);
3304 out_fput:
3305         fput(src_file);
3306         return ret;
3307 }
3308
3309 long btrfs_ioctl(struct file *file, unsigned int
3310                 cmd, unsigned long arg)
3311 {
3312         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3313
3314         switch (cmd) {
3315         case BTRFS_IOC_SNAP_CREATE:
3316                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
3317         case BTRFS_IOC_DEFRAG:
3318                 return btrfs_ioctl_defrag(file);
3319         case BTRFS_IOC_RESIZE:
3320                 return btrfs_ioctl_resize(root, (void __user *)arg);
3321         case BTRFS_IOC_ADD_DEV:
3322                 return btrfs_ioctl_add_dev(root, (void __user *)arg);
3323         case BTRFS_IOC_RM_DEV:
3324                 return btrfs_ioctl_rm_dev(root, (void __user *)arg);
3325         case BTRFS_IOC_BALANCE:
3326                 return btrfs_balance(root->fs_info->dev_root);
3327         case BTRFS_IOC_CLONE:
3328                 return btrfs_ioctl_clone(file, arg);
3329         }
3330
3331         return -ENOTTY;
3332 }
3333
3334 /*
3335  * Called inside transaction, so use GFP_NOFS
3336  */
3337 struct inode *btrfs_alloc_inode(struct super_block *sb)
3338 {
3339         struct btrfs_inode *ei;
3340
3341         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
3342         if (!ei)
3343                 return NULL;
3344         ei->last_trans = 0;
3345         ei->ordered_trans = 0;
3346         return &ei->vfs_inode;
3347 }
3348
3349 void btrfs_destroy_inode(struct inode *inode)
3350 {
3351         WARN_ON(!list_empty(&inode->i_dentry));
3352         WARN_ON(inode->i_data.nrpages);
3353
3354         btrfs_drop_extent_cache(inode, 0, (u64)-1);
3355         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
3356 }
3357
3358 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
3359 static void init_once(struct kmem_cache * cachep, void *foo)
3360 #else
3361 static void init_once(void * foo, struct kmem_cache * cachep,
3362                       unsigned long flags)
3363 #endif
3364 {
3365         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
3366
3367         inode_init_once(&ei->vfs_inode);
3368 }
3369
3370 void btrfs_destroy_cachep(void)
3371 {
3372         if (btrfs_inode_cachep)
3373                 kmem_cache_destroy(btrfs_inode_cachep);
3374         if (btrfs_trans_handle_cachep)
3375                 kmem_cache_destroy(btrfs_trans_handle_cachep);
3376         if (btrfs_transaction_cachep)
3377                 kmem_cache_destroy(btrfs_transaction_cachep);
3378         if (btrfs_bit_radix_cachep)
3379                 kmem_cache_destroy(btrfs_bit_radix_cachep);
3380         if (btrfs_path_cachep)
3381                 kmem_cache_destroy(btrfs_path_cachep);
3382 }
3383
3384 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
3385                                        unsigned long extra_flags,
3386 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
3387                                        void (*ctor)(struct kmem_cache *, void *)
3388 #else
3389                                        void (*ctor)(void *, struct kmem_cache *,
3390                                                     unsigned long)
3391 #endif
3392                                      )
3393 {
3394         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
3395                                  SLAB_MEM_SPREAD | extra_flags), ctor
3396 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
3397                                  ,NULL
3398 #endif
3399                                 );
3400 }
3401
3402 int btrfs_init_cachep(void)
3403 {
3404         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
3405                                           sizeof(struct btrfs_inode),
3406                                           0, init_once);
3407         if (!btrfs_inode_cachep)
3408                 goto fail;
3409         btrfs_trans_handle_cachep =
3410                         btrfs_cache_create("btrfs_trans_handle_cache",
3411                                            sizeof(struct btrfs_trans_handle),
3412                                            0, NULL);
3413         if (!btrfs_trans_handle_cachep)
3414                 goto fail;
3415         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
3416                                              sizeof(struct btrfs_transaction),
3417                                              0, NULL);
3418         if (!btrfs_transaction_cachep)
3419                 goto fail;
3420         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
3421                                          sizeof(struct btrfs_path),
3422                                          0, NULL);
3423         if (!btrfs_path_cachep)
3424                 goto fail;
3425         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
3426                                               SLAB_DESTROY_BY_RCU, NULL);
3427         if (!btrfs_bit_radix_cachep)
3428                 goto fail;
3429         return 0;
3430 fail:
3431         btrfs_destroy_cachep();
3432         return -ENOMEM;
3433 }
3434
3435 static int btrfs_getattr(struct vfsmount *mnt,
3436                          struct dentry *dentry, struct kstat *stat)
3437 {
3438         struct inode *inode = dentry->d_inode;
3439         generic_fillattr(inode, stat);
3440         stat->blksize = PAGE_CACHE_SIZE;
3441         stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9);
3442         return 0;
3443 }
3444
3445 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
3446                            struct inode * new_dir,struct dentry *new_dentry)
3447 {
3448         struct btrfs_trans_handle *trans;
3449         struct btrfs_root *root = BTRFS_I(old_dir)->root;
3450         struct inode *new_inode = new_dentry->d_inode;
3451         struct inode *old_inode = old_dentry->d_inode;
3452         struct timespec ctime = CURRENT_TIME;
3453         struct btrfs_path *path;
3454         int ret;
3455
3456         if (S_ISDIR(old_inode->i_mode) && new_inode &&
3457             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
3458                 return -ENOTEMPTY;
3459         }
3460
3461         mutex_lock(&root->fs_info->fs_mutex);
3462         ret = btrfs_check_free_space(root, 1, 0);
3463         if (ret)
3464                 goto out_unlock;
3465
3466         trans = btrfs_start_transaction(root, 1);
3467
3468         btrfs_set_trans_block_group(trans, new_dir);
3469         path = btrfs_alloc_path();
3470         if (!path) {
3471                 ret = -ENOMEM;
3472                 goto out_fail;
3473         }
3474
3475         old_dentry->d_inode->i_nlink++;
3476         old_dir->i_ctime = old_dir->i_mtime = ctime;
3477         new_dir->i_ctime = new_dir->i_mtime = ctime;
3478         old_inode->i_ctime = ctime;
3479
3480         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
3481         if (ret)
3482                 goto out_fail;
3483
3484         if (new_inode) {
3485                 new_inode->i_ctime = CURRENT_TIME;
3486                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
3487                 if (ret)
3488                         goto out_fail;
3489         }
3490         ret = btrfs_add_link(trans, new_dentry, old_inode, 1);
3491         if (ret)
3492                 goto out_fail;
3493
3494 out_fail:
3495         btrfs_free_path(path);
3496         btrfs_end_transaction(trans, root);
3497 out_unlock:
3498         mutex_unlock(&root->fs_info->fs_mutex);
3499         return ret;
3500 }
3501
3502 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3503                          const char *symname)
3504 {
3505         struct btrfs_trans_handle *trans;
3506         struct btrfs_root *root = BTRFS_I(dir)->root;
3507         struct btrfs_path *path;
3508         struct btrfs_key key;
3509         struct inode *inode = NULL;
3510         int err;
3511         int drop_inode = 0;
3512         u64 objectid;
3513         int name_len;
3514         int datasize;
3515         unsigned long ptr;
3516         struct btrfs_file_extent_item *ei;
3517         struct extent_buffer *leaf;
3518         unsigned long nr = 0;
3519
3520         name_len = strlen(symname) + 1;
3521         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
3522                 return -ENAMETOOLONG;
3523
3524         mutex_lock(&root->fs_info->fs_mutex);
3525         err = btrfs_check_free_space(root, 1, 0);
3526         if (err)
3527                 goto out_fail;
3528
3529         trans = btrfs_start_transaction(root, 1);
3530         btrfs_set_trans_block_group(trans, dir);
3531
3532         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3533         if (err) {
3534                 err = -ENOSPC;
3535                 goto out_unlock;
3536         }
3537
3538         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
3539                                 dentry->d_name.len,
3540                                 dentry->d_parent->d_inode->i_ino, objectid,
3541                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
3542         err = PTR_ERR(inode);
3543         if (IS_ERR(inode))
3544                 goto out_unlock;
3545
3546         btrfs_set_trans_block_group(trans, inode);
3547         err = btrfs_add_nondir(trans, dentry, inode, 0);
3548         if (err)
3549                 drop_inode = 1;
3550         else {
3551                 inode->i_mapping->a_ops = &btrfs_aops;
3552                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3553                 inode->i_fop = &btrfs_file_operations;
3554                 inode->i_op = &btrfs_file_inode_operations;
3555                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3556                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3557                                      inode->i_mapping, GFP_NOFS);
3558                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3559                                      inode->i_mapping, GFP_NOFS);
3560                 BTRFS_I(inode)->delalloc_bytes = 0;
3561                 atomic_set(&BTRFS_I(inode)->ordered_writeback, 0);
3562                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3563         }
3564         dir->i_sb->s_dirt = 1;
3565         btrfs_update_inode_block_group(trans, inode);
3566         btrfs_update_inode_block_group(trans, dir);
3567         if (drop_inode)
3568                 goto out_unlock;
3569
3570         path = btrfs_alloc_path();
3571         BUG_ON(!path);
3572         key.objectid = inode->i_ino;
3573         key.offset = 0;
3574         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3575         datasize = btrfs_file_extent_calc_inline_size(name_len);
3576         err = btrfs_insert_empty_item(trans, root, path, &key,
3577                                       datasize);
3578         if (err) {
3579                 drop_inode = 1;
3580                 goto out_unlock;
3581         }
3582         leaf = path->nodes[0];
3583         ei = btrfs_item_ptr(leaf, path->slots[0],
3584                             struct btrfs_file_extent_item);
3585         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3586         btrfs_set_file_extent_type(leaf, ei,
3587                                    BTRFS_FILE_EXTENT_INLINE);
3588         ptr = btrfs_file_extent_inline_start(ei);
3589         write_extent_buffer(leaf, symname, ptr, name_len);
3590         btrfs_mark_buffer_dirty(leaf);
3591         btrfs_free_path(path);
3592
3593         inode->i_op = &btrfs_symlink_inode_operations;
3594         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3595         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3596         inode->i_size = name_len - 1;
3597         err = btrfs_update_inode(trans, root, inode);
3598         if (err)
3599                 drop_inode = 1;
3600
3601 out_unlock:
3602         nr = trans->blocks_used;
3603         btrfs_end_transaction(trans, root);
3604 out_fail:
3605         mutex_unlock(&root->fs_info->fs_mutex);
3606         if (drop_inode) {
3607                 inode_dec_link_count(inode);
3608                 iput(inode);
3609         }
3610         btrfs_btree_balance_dirty(root, nr);
3611         btrfs_throttle(root);
3612         return err;
3613 }
3614
3615 static int btrfs_permission(struct inode *inode, int mask,
3616                             struct nameidata *nd)
3617 {
3618         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
3619                 return -EACCES;
3620         return generic_permission(inode, mask, NULL);
3621 }
3622
3623 static struct inode_operations btrfs_dir_inode_operations = {
3624         .lookup         = btrfs_lookup,
3625         .create         = btrfs_create,
3626         .unlink         = btrfs_unlink,
3627         .link           = btrfs_link,
3628         .mkdir          = btrfs_mkdir,
3629         .rmdir          = btrfs_rmdir,
3630         .rename         = btrfs_rename,
3631         .symlink        = btrfs_symlink,
3632         .setattr        = btrfs_setattr,
3633         .mknod          = btrfs_mknod,
3634         .setxattr       = generic_setxattr,
3635         .getxattr       = generic_getxattr,
3636         .listxattr      = btrfs_listxattr,
3637         .removexattr    = generic_removexattr,
3638         .permission     = btrfs_permission,
3639 };
3640 static struct inode_operations btrfs_dir_ro_inode_operations = {
3641         .lookup         = btrfs_lookup,
3642         .permission     = btrfs_permission,
3643 };
3644 static struct file_operations btrfs_dir_file_operations = {
3645         .llseek         = generic_file_llseek,
3646         .read           = generic_read_dir,
3647         .readdir        = btrfs_readdir,
3648         .unlocked_ioctl = btrfs_ioctl,
3649 #ifdef CONFIG_COMPAT
3650         .compat_ioctl   = btrfs_ioctl,
3651 #endif
3652 };
3653
3654 static struct extent_io_ops btrfs_extent_io_ops = {
3655         .fill_delalloc = run_delalloc_range,
3656         .submit_bio_hook = btrfs_submit_bio_hook,
3657         .merge_bio_hook = btrfs_merge_bio_hook,
3658         .readpage_io_hook = btrfs_readpage_io_hook,
3659         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3660         .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
3661         .set_bit_hook = btrfs_set_bit_hook,
3662         .clear_bit_hook = btrfs_clear_bit_hook,
3663 };
3664
3665 static struct address_space_operations btrfs_aops = {
3666         .readpage       = btrfs_readpage,
3667         .writepage      = btrfs_writepage,
3668         .writepages     = btrfs_writepages,
3669         .readpages      = btrfs_readpages,
3670         .sync_page      = block_sync_page,
3671         .bmap           = btrfs_bmap,
3672         .direct_IO      = btrfs_direct_IO,
3673         .invalidatepage = btrfs_invalidatepage,
3674         .releasepage    = btrfs_releasepage,
3675         .set_page_dirty = __set_page_dirty_nobuffers,
3676 };
3677
3678 static struct address_space_operations btrfs_symlink_aops = {
3679         .readpage       = btrfs_readpage,
3680         .writepage      = btrfs_writepage,
3681         .invalidatepage = btrfs_invalidatepage,
3682         .releasepage    = btrfs_releasepage,
3683 };
3684
3685 static struct inode_operations btrfs_file_inode_operations = {
3686         .truncate       = btrfs_truncate,
3687         .getattr        = btrfs_getattr,
3688         .setattr        = btrfs_setattr,
3689         .setxattr       = generic_setxattr,
3690         .getxattr       = generic_getxattr,
3691         .listxattr      = btrfs_listxattr,
3692         .removexattr    = generic_removexattr,
3693         .permission     = btrfs_permission,
3694 };
3695 static struct inode_operations btrfs_special_inode_operations = {
3696         .getattr        = btrfs_getattr,
3697         .setattr        = btrfs_setattr,
3698         .permission     = btrfs_permission,
3699 };
3700 static struct inode_operations btrfs_symlink_inode_operations = {
3701         .readlink       = generic_readlink,
3702         .follow_link    = page_follow_link_light,
3703         .put_link       = page_put_link,
3704         .permission     = btrfs_permission,
3705 };