Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tiwai/sound-2.6
[linux-2.6] / fs / btrfs / free-space-cache.c
1 /*
2  * Copyright (C) 2008 Red Hat.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/pagemap.h>
20 #include <linux/sched.h>
21 #include <linux/math64.h>
22 #include "ctree.h"
23 #include "free-space-cache.h"
24 #include "transaction.h"
25
26 #define BITS_PER_BITMAP         (PAGE_CACHE_SIZE * 8)
27 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
28
29 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
30                                           u64 offset)
31 {
32         BUG_ON(offset < bitmap_start);
33         offset -= bitmap_start;
34         return (unsigned long)(div64_u64(offset, sectorsize));
35 }
36
37 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize)
38 {
39         return (unsigned long)(div64_u64(bytes, sectorsize));
40 }
41
42 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group,
43                                    u64 offset)
44 {
45         u64 bitmap_start;
46         u64 bytes_per_bitmap;
47
48         bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize;
49         bitmap_start = offset - block_group->key.objectid;
50         bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap);
51         bitmap_start *= bytes_per_bitmap;
52         bitmap_start += block_group->key.objectid;
53
54         return bitmap_start;
55 }
56
57 static int tree_insert_offset(struct rb_root *root, u64 offset,
58                               struct rb_node *node, int bitmap)
59 {
60         struct rb_node **p = &root->rb_node;
61         struct rb_node *parent = NULL;
62         struct btrfs_free_space *info;
63
64         while (*p) {
65                 parent = *p;
66                 info = rb_entry(parent, struct btrfs_free_space, offset_index);
67
68                 if (offset < info->offset) {
69                         p = &(*p)->rb_left;
70                 } else if (offset > info->offset) {
71                         p = &(*p)->rb_right;
72                 } else {
73                         /*
74                          * we could have a bitmap entry and an extent entry
75                          * share the same offset.  If this is the case, we want
76                          * the extent entry to always be found first if we do a
77                          * linear search through the tree, since we want to have
78                          * the quickest allocation time, and allocating from an
79                          * extent is faster than allocating from a bitmap.  So
80                          * if we're inserting a bitmap and we find an entry at
81                          * this offset, we want to go right, or after this entry
82                          * logically.  If we are inserting an extent and we've
83                          * found a bitmap, we want to go left, or before
84                          * logically.
85                          */
86                         if (bitmap) {
87                                 WARN_ON(info->bitmap);
88                                 p = &(*p)->rb_right;
89                         } else {
90                                 WARN_ON(!info->bitmap);
91                                 p = &(*p)->rb_left;
92                         }
93                 }
94         }
95
96         rb_link_node(node, parent, p);
97         rb_insert_color(node, root);
98
99         return 0;
100 }
101
102 /*
103  * searches the tree for the given offset.
104  *
105  * fuzzy - If this is set, then we are trying to make an allocation, and we just
106  * want a section that has at least bytes size and comes at or after the given
107  * offset.
108  */
109 static struct btrfs_free_space *
110 tree_search_offset(struct btrfs_block_group_cache *block_group,
111                    u64 offset, int bitmap_only, int fuzzy)
112 {
113         struct rb_node *n = block_group->free_space_offset.rb_node;
114         struct btrfs_free_space *entry, *prev = NULL;
115
116         /* find entry that is closest to the 'offset' */
117         while (1) {
118                 if (!n) {
119                         entry = NULL;
120                         break;
121                 }
122
123                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
124                 prev = entry;
125
126                 if (offset < entry->offset)
127                         n = n->rb_left;
128                 else if (offset > entry->offset)
129                         n = n->rb_right;
130                 else
131                         break;
132         }
133
134         if (bitmap_only) {
135                 if (!entry)
136                         return NULL;
137                 if (entry->bitmap)
138                         return entry;
139
140                 /*
141                  * bitmap entry and extent entry may share same offset,
142                  * in that case, bitmap entry comes after extent entry.
143                  */
144                 n = rb_next(n);
145                 if (!n)
146                         return NULL;
147                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
148                 if (entry->offset != offset)
149                         return NULL;
150
151                 WARN_ON(!entry->bitmap);
152                 return entry;
153         } else if (entry) {
154                 if (entry->bitmap) {
155                         /*
156                          * if previous extent entry covers the offset,
157                          * we should return it instead of the bitmap entry
158                          */
159                         n = &entry->offset_index;
160                         while (1) {
161                                 n = rb_prev(n);
162                                 if (!n)
163                                         break;
164                                 prev = rb_entry(n, struct btrfs_free_space,
165                                                 offset_index);
166                                 if (!prev->bitmap) {
167                                         if (prev->offset + prev->bytes > offset)
168                                                 entry = prev;
169                                         break;
170                                 }
171                         }
172                 }
173                 return entry;
174         }
175
176         if (!prev)
177                 return NULL;
178
179         /* find last entry before the 'offset' */
180         entry = prev;
181         if (entry->offset > offset) {
182                 n = rb_prev(&entry->offset_index);
183                 if (n) {
184                         entry = rb_entry(n, struct btrfs_free_space,
185                                         offset_index);
186                         BUG_ON(entry->offset > offset);
187                 } else {
188                         if (fuzzy)
189                                 return entry;
190                         else
191                                 return NULL;
192                 }
193         }
194
195         if (entry->bitmap) {
196                 n = &entry->offset_index;
197                 while (1) {
198                         n = rb_prev(n);
199                         if (!n)
200                                 break;
201                         prev = rb_entry(n, struct btrfs_free_space,
202                                         offset_index);
203                         if (!prev->bitmap) {
204                                 if (prev->offset + prev->bytes > offset)
205                                         return prev;
206                                 break;
207                         }
208                 }
209                 if (entry->offset + BITS_PER_BITMAP *
210                     block_group->sectorsize > offset)
211                         return entry;
212         } else if (entry->offset + entry->bytes > offset)
213                 return entry;
214
215         if (!fuzzy)
216                 return NULL;
217
218         while (1) {
219                 if (entry->bitmap) {
220                         if (entry->offset + BITS_PER_BITMAP *
221                             block_group->sectorsize > offset)
222                                 break;
223                 } else {
224                         if (entry->offset + entry->bytes > offset)
225                                 break;
226                 }
227
228                 n = rb_next(&entry->offset_index);
229                 if (!n)
230                         return NULL;
231                 entry = rb_entry(n, struct btrfs_free_space, offset_index);
232         }
233         return entry;
234 }
235
236 static void unlink_free_space(struct btrfs_block_group_cache *block_group,
237                               struct btrfs_free_space *info)
238 {
239         rb_erase(&info->offset_index, &block_group->free_space_offset);
240         block_group->free_extents--;
241         block_group->free_space -= info->bytes;
242 }
243
244 static int link_free_space(struct btrfs_block_group_cache *block_group,
245                            struct btrfs_free_space *info)
246 {
247         int ret = 0;
248
249         BUG_ON(!info->bitmap && !info->bytes);
250         ret = tree_insert_offset(&block_group->free_space_offset, info->offset,
251                                  &info->offset_index, (info->bitmap != NULL));
252         if (ret)
253                 return ret;
254
255         block_group->free_space += info->bytes;
256         block_group->free_extents++;
257         return ret;
258 }
259
260 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group)
261 {
262         u64 max_bytes, possible_bytes;
263
264         /*
265          * The goal is to keep the total amount of memory used per 1gb of space
266          * at or below 32k, so we need to adjust how much memory we allow to be
267          * used by extent based free space tracking
268          */
269         max_bytes = MAX_CACHE_BYTES_PER_GIG *
270                 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024));
271
272         possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) +
273                 (sizeof(struct btrfs_free_space) *
274                  block_group->extents_thresh);
275
276         if (possible_bytes > max_bytes) {
277                 int extent_bytes = max_bytes -
278                         (block_group->total_bitmaps * PAGE_CACHE_SIZE);
279
280                 if (extent_bytes <= 0) {
281                         block_group->extents_thresh = 0;
282                         return;
283                 }
284
285                 block_group->extents_thresh = extent_bytes /
286                         (sizeof(struct btrfs_free_space));
287         }
288 }
289
290 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group,
291                               struct btrfs_free_space *info, u64 offset,
292                               u64 bytes)
293 {
294         unsigned long start, end;
295         unsigned long i;
296
297         start = offset_to_bit(info->offset, block_group->sectorsize, offset);
298         end = start + bytes_to_bits(bytes, block_group->sectorsize);
299         BUG_ON(end > BITS_PER_BITMAP);
300
301         for (i = start; i < end; i++)
302                 clear_bit(i, info->bitmap);
303
304         info->bytes -= bytes;
305         block_group->free_space -= bytes;
306 }
307
308 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group,
309                             struct btrfs_free_space *info, u64 offset,
310                             u64 bytes)
311 {
312         unsigned long start, end;
313         unsigned long i;
314
315         start = offset_to_bit(info->offset, block_group->sectorsize, offset);
316         end = start + bytes_to_bits(bytes, block_group->sectorsize);
317         BUG_ON(end > BITS_PER_BITMAP);
318
319         for (i = start; i < end; i++)
320                 set_bit(i, info->bitmap);
321
322         info->bytes += bytes;
323         block_group->free_space += bytes;
324 }
325
326 static int search_bitmap(struct btrfs_block_group_cache *block_group,
327                          struct btrfs_free_space *bitmap_info, u64 *offset,
328                          u64 *bytes)
329 {
330         unsigned long found_bits = 0;
331         unsigned long bits, i;
332         unsigned long next_zero;
333
334         i = offset_to_bit(bitmap_info->offset, block_group->sectorsize,
335                           max_t(u64, *offset, bitmap_info->offset));
336         bits = bytes_to_bits(*bytes, block_group->sectorsize);
337
338         for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i);
339              i < BITS_PER_BITMAP;
340              i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) {
341                 next_zero = find_next_zero_bit(bitmap_info->bitmap,
342                                                BITS_PER_BITMAP, i);
343                 if ((next_zero - i) >= bits) {
344                         found_bits = next_zero - i;
345                         break;
346                 }
347                 i = next_zero;
348         }
349
350         if (found_bits) {
351                 *offset = (u64)(i * block_group->sectorsize) +
352                         bitmap_info->offset;
353                 *bytes = (u64)(found_bits) * block_group->sectorsize;
354                 return 0;
355         }
356
357         return -1;
358 }
359
360 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache
361                                                 *block_group, u64 *offset,
362                                                 u64 *bytes, int debug)
363 {
364         struct btrfs_free_space *entry;
365         struct rb_node *node;
366         int ret;
367
368         if (!block_group->free_space_offset.rb_node)
369                 return NULL;
370
371         entry = tree_search_offset(block_group,
372                                    offset_to_bitmap(block_group, *offset),
373                                    0, 1);
374         if (!entry)
375                 return NULL;
376
377         for (node = &entry->offset_index; node; node = rb_next(node)) {
378                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
379                 if (entry->bytes < *bytes)
380                         continue;
381
382                 if (entry->bitmap) {
383                         ret = search_bitmap(block_group, entry, offset, bytes);
384                         if (!ret)
385                                 return entry;
386                         continue;
387                 }
388
389                 *offset = entry->offset;
390                 *bytes = entry->bytes;
391                 return entry;
392         }
393
394         return NULL;
395 }
396
397 static void add_new_bitmap(struct btrfs_block_group_cache *block_group,
398                            struct btrfs_free_space *info, u64 offset)
399 {
400         u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize;
401         int max_bitmaps = (int)div64_u64(block_group->key.offset +
402                                          bytes_per_bg - 1, bytes_per_bg);
403         BUG_ON(block_group->total_bitmaps >= max_bitmaps);
404
405         info->offset = offset_to_bitmap(block_group, offset);
406         link_free_space(block_group, info);
407         block_group->total_bitmaps++;
408
409         recalculate_thresholds(block_group);
410 }
411
412 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group,
413                               struct btrfs_free_space *bitmap_info,
414                               u64 *offset, u64 *bytes)
415 {
416         u64 end;
417         u64 search_start, search_bytes;
418         int ret;
419
420 again:
421         end = bitmap_info->offset +
422                 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1;
423
424         /*
425          * XXX - this can go away after a few releases.
426          *
427          * since the only user of btrfs_remove_free_space is the tree logging
428          * stuff, and the only way to test that is under crash conditions, we
429          * want to have this debug stuff here just in case somethings not
430          * working.  Search the bitmap for the space we are trying to use to
431          * make sure its actually there.  If its not there then we need to stop
432          * because something has gone wrong.
433          */
434         search_start = *offset;
435         search_bytes = *bytes;
436         ret = search_bitmap(block_group, bitmap_info, &search_start,
437                             &search_bytes);
438         BUG_ON(ret < 0 || search_start != *offset);
439
440         if (*offset > bitmap_info->offset && *offset + *bytes > end) {
441                 bitmap_clear_bits(block_group, bitmap_info, *offset,
442                                   end - *offset + 1);
443                 *bytes -= end - *offset + 1;
444                 *offset = end + 1;
445         } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
446                 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes);
447                 *bytes = 0;
448         }
449
450         if (*bytes) {
451                 struct rb_node *next = rb_next(&bitmap_info->offset_index);
452                 if (!bitmap_info->bytes) {
453                         unlink_free_space(block_group, bitmap_info);
454                         kfree(bitmap_info->bitmap);
455                         kfree(bitmap_info);
456                         block_group->total_bitmaps--;
457                         recalculate_thresholds(block_group);
458                 }
459
460                 /*
461                  * no entry after this bitmap, but we still have bytes to
462                  * remove, so something has gone wrong.
463                  */
464                 if (!next)
465                         return -EINVAL;
466
467                 bitmap_info = rb_entry(next, struct btrfs_free_space,
468                                        offset_index);
469
470                 /*
471                  * if the next entry isn't a bitmap we need to return to let the
472                  * extent stuff do its work.
473                  */
474                 if (!bitmap_info->bitmap)
475                         return -EAGAIN;
476
477                 /*
478                  * Ok the next item is a bitmap, but it may not actually hold
479                  * the information for the rest of this free space stuff, so
480                  * look for it, and if we don't find it return so we can try
481                  * everything over again.
482                  */
483                 search_start = *offset;
484                 search_bytes = *bytes;
485                 ret = search_bitmap(block_group, bitmap_info, &search_start,
486                                     &search_bytes);
487                 if (ret < 0 || search_start != *offset)
488                         return -EAGAIN;
489
490                 goto again;
491         } else if (!bitmap_info->bytes) {
492                 unlink_free_space(block_group, bitmap_info);
493                 kfree(bitmap_info->bitmap);
494                 kfree(bitmap_info);
495                 block_group->total_bitmaps--;
496                 recalculate_thresholds(block_group);
497         }
498
499         return 0;
500 }
501
502 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group,
503                               struct btrfs_free_space *info)
504 {
505         struct btrfs_free_space *bitmap_info;
506         int added = 0;
507         u64 bytes, offset, end;
508         int ret;
509
510         /*
511          * If we are below the extents threshold then we can add this as an
512          * extent, and don't have to deal with the bitmap
513          */
514         if (block_group->free_extents < block_group->extents_thresh &&
515             info->bytes > block_group->sectorsize * 4)
516                 return 0;
517
518         /*
519          * some block groups are so tiny they can't be enveloped by a bitmap, so
520          * don't even bother to create a bitmap for this
521          */
522         if (BITS_PER_BITMAP * block_group->sectorsize >
523             block_group->key.offset)
524                 return 0;
525
526         bytes = info->bytes;
527         offset = info->offset;
528
529 again:
530         bitmap_info = tree_search_offset(block_group,
531                                          offset_to_bitmap(block_group, offset),
532                                          1, 0);
533         if (!bitmap_info) {
534                 BUG_ON(added);
535                 goto new_bitmap;
536         }
537
538         end = bitmap_info->offset +
539                 (u64)(BITS_PER_BITMAP * block_group->sectorsize);
540
541         if (offset >= bitmap_info->offset && offset + bytes > end) {
542                 bitmap_set_bits(block_group, bitmap_info, offset,
543                                 end - offset);
544                 bytes -= end - offset;
545                 offset = end;
546                 added = 0;
547         } else if (offset >= bitmap_info->offset && offset + bytes <= end) {
548                 bitmap_set_bits(block_group, bitmap_info, offset, bytes);
549                 bytes = 0;
550         } else {
551                 BUG();
552         }
553
554         if (!bytes) {
555                 ret = 1;
556                 goto out;
557         } else
558                 goto again;
559
560 new_bitmap:
561         if (info && info->bitmap) {
562                 add_new_bitmap(block_group, info, offset);
563                 added = 1;
564                 info = NULL;
565                 goto again;
566         } else {
567                 spin_unlock(&block_group->tree_lock);
568
569                 /* no pre-allocated info, allocate a new one */
570                 if (!info) {
571                         info = kzalloc(sizeof(struct btrfs_free_space),
572                                        GFP_NOFS);
573                         if (!info) {
574                                 spin_lock(&block_group->tree_lock);
575                                 ret = -ENOMEM;
576                                 goto out;
577                         }
578                 }
579
580                 /* allocate the bitmap */
581                 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
582                 spin_lock(&block_group->tree_lock);
583                 if (!info->bitmap) {
584                         ret = -ENOMEM;
585                         goto out;
586                 }
587                 goto again;
588         }
589
590 out:
591         if (info) {
592                 if (info->bitmap)
593                         kfree(info->bitmap);
594                 kfree(info);
595         }
596
597         return ret;
598 }
599
600 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
601                          u64 offset, u64 bytes)
602 {
603         struct btrfs_free_space *right_info = NULL;
604         struct btrfs_free_space *left_info = NULL;
605         struct btrfs_free_space *info = NULL;
606         int ret = 0;
607
608         info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
609         if (!info)
610                 return -ENOMEM;
611
612         info->offset = offset;
613         info->bytes = bytes;
614
615         spin_lock(&block_group->tree_lock);
616
617         /*
618          * first we want to see if there is free space adjacent to the range we
619          * are adding, if there is remove that struct and add a new one to
620          * cover the entire range
621          */
622         right_info = tree_search_offset(block_group, offset + bytes, 0, 0);
623         if (right_info && rb_prev(&right_info->offset_index))
624                 left_info = rb_entry(rb_prev(&right_info->offset_index),
625                                      struct btrfs_free_space, offset_index);
626         else
627                 left_info = tree_search_offset(block_group, offset - 1, 0, 0);
628
629         /*
630          * If there was no extent directly to the left or right of this new
631          * extent then we know we're going to have to allocate a new extent, so
632          * before we do that see if we need to drop this into a bitmap
633          */
634         if ((!left_info || left_info->bitmap) &&
635             (!right_info || right_info->bitmap)) {
636                 ret = insert_into_bitmap(block_group, info);
637
638                 if (ret < 0) {
639                         goto out;
640                 } else if (ret) {
641                         ret = 0;
642                         goto out;
643                 }
644         }
645
646         if (right_info && !right_info->bitmap) {
647                 unlink_free_space(block_group, right_info);
648                 info->bytes += right_info->bytes;
649                 kfree(right_info);
650         }
651
652         if (left_info && !left_info->bitmap &&
653             left_info->offset + left_info->bytes == offset) {
654                 unlink_free_space(block_group, left_info);
655                 info->offset = left_info->offset;
656                 info->bytes += left_info->bytes;
657                 kfree(left_info);
658         }
659
660         ret = link_free_space(block_group, info);
661         if (ret)
662                 kfree(info);
663 out:
664         spin_unlock(&block_group->tree_lock);
665
666         if (ret) {
667                 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
668                 BUG_ON(ret == -EEXIST);
669         }
670
671         return ret;
672 }
673
674 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
675                             u64 offset, u64 bytes)
676 {
677         struct btrfs_free_space *info;
678         struct btrfs_free_space *next_info = NULL;
679         int ret = 0;
680
681         spin_lock(&block_group->tree_lock);
682
683 again:
684         info = tree_search_offset(block_group, offset, 0, 0);
685         if (!info) {
686                 /*
687                  * oops didn't find an extent that matched the space we wanted
688                  * to remove, look for a bitmap instead
689                  */
690                 info = tree_search_offset(block_group,
691                                           offset_to_bitmap(block_group, offset),
692                                           1, 0);
693                 if (!info) {
694                         WARN_ON(1);
695                         goto out_lock;
696                 }
697         }
698
699         if (info->bytes < bytes && rb_next(&info->offset_index)) {
700                 u64 end;
701                 next_info = rb_entry(rb_next(&info->offset_index),
702                                              struct btrfs_free_space,
703                                              offset_index);
704
705                 if (next_info->bitmap)
706                         end = next_info->offset + BITS_PER_BITMAP *
707                                 block_group->sectorsize - 1;
708                 else
709                         end = next_info->offset + next_info->bytes;
710
711                 if (next_info->bytes < bytes ||
712                     next_info->offset > offset || offset > end) {
713                         printk(KERN_CRIT "Found free space at %llu, size %llu,"
714                               " trying to use %llu\n",
715                               (unsigned long long)info->offset,
716                               (unsigned long long)info->bytes,
717                               (unsigned long long)bytes);
718                         WARN_ON(1);
719                         ret = -EINVAL;
720                         goto out_lock;
721                 }
722
723                 info = next_info;
724         }
725
726         if (info->bytes == bytes) {
727                 unlink_free_space(block_group, info);
728                 if (info->bitmap) {
729                         kfree(info->bitmap);
730                         block_group->total_bitmaps--;
731                 }
732                 kfree(info);
733                 goto out_lock;
734         }
735
736         if (!info->bitmap && info->offset == offset) {
737                 unlink_free_space(block_group, info);
738                 info->offset += bytes;
739                 info->bytes -= bytes;
740                 link_free_space(block_group, info);
741                 goto out_lock;
742         }
743
744         if (!info->bitmap && info->offset <= offset &&
745             info->offset + info->bytes >= offset + bytes) {
746                 u64 old_start = info->offset;
747                 /*
748                  * we're freeing space in the middle of the info,
749                  * this can happen during tree log replay
750                  *
751                  * first unlink the old info and then
752                  * insert it again after the hole we're creating
753                  */
754                 unlink_free_space(block_group, info);
755                 if (offset + bytes < info->offset + info->bytes) {
756                         u64 old_end = info->offset + info->bytes;
757
758                         info->offset = offset + bytes;
759                         info->bytes = old_end - info->offset;
760                         ret = link_free_space(block_group, info);
761                         WARN_ON(ret);
762                         if (ret)
763                                 goto out_lock;
764                 } else {
765                         /* the hole we're creating ends at the end
766                          * of the info struct, just free the info
767                          */
768                         kfree(info);
769                 }
770                 spin_unlock(&block_group->tree_lock);
771
772                 /* step two, insert a new info struct to cover
773                  * anything before the hole
774                  */
775                 ret = btrfs_add_free_space(block_group, old_start,
776                                            offset - old_start);
777                 WARN_ON(ret);
778                 goto out;
779         }
780
781         ret = remove_from_bitmap(block_group, info, &offset, &bytes);
782         if (ret == -EAGAIN)
783                 goto again;
784         BUG_ON(ret);
785 out_lock:
786         spin_unlock(&block_group->tree_lock);
787 out:
788         return ret;
789 }
790
791 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
792                            u64 bytes)
793 {
794         struct btrfs_free_space *info;
795         struct rb_node *n;
796         int count = 0;
797
798         for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) {
799                 info = rb_entry(n, struct btrfs_free_space, offset_index);
800                 if (info->bytes >= bytes)
801                         count++;
802                 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
803                        (unsigned long long)info->offset,
804                        (unsigned long long)info->bytes,
805                        (info->bitmap) ? "yes" : "no");
806         }
807         printk(KERN_INFO "block group has cluster?: %s\n",
808                list_empty(&block_group->cluster_list) ? "no" : "yes");
809         printk(KERN_INFO "%d blocks of free space at or bigger than bytes is"
810                "\n", count);
811 }
812
813 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group)
814 {
815         struct btrfs_free_space *info;
816         struct rb_node *n;
817         u64 ret = 0;
818
819         for (n = rb_first(&block_group->free_space_offset); n;
820              n = rb_next(n)) {
821                 info = rb_entry(n, struct btrfs_free_space, offset_index);
822                 ret += info->bytes;
823         }
824
825         return ret;
826 }
827
828 /*
829  * for a given cluster, put all of its extents back into the free
830  * space cache.  If the block group passed doesn't match the block group
831  * pointed to by the cluster, someone else raced in and freed the
832  * cluster already.  In that case, we just return without changing anything
833  */
834 static int
835 __btrfs_return_cluster_to_free_space(
836                              struct btrfs_block_group_cache *block_group,
837                              struct btrfs_free_cluster *cluster)
838 {
839         struct btrfs_free_space *entry;
840         struct rb_node *node;
841         bool bitmap;
842
843         spin_lock(&cluster->lock);
844         if (cluster->block_group != block_group)
845                 goto out;
846
847         bitmap = cluster->points_to_bitmap;
848         cluster->block_group = NULL;
849         cluster->window_start = 0;
850         list_del_init(&cluster->block_group_list);
851         cluster->points_to_bitmap = false;
852
853         if (bitmap)
854                 goto out;
855
856         node = rb_first(&cluster->root);
857         while (node) {
858                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
859                 node = rb_next(&entry->offset_index);
860                 rb_erase(&entry->offset_index, &cluster->root);
861                 BUG_ON(entry->bitmap);
862                 tree_insert_offset(&block_group->free_space_offset,
863                                    entry->offset, &entry->offset_index, 0);
864         }
865         cluster->root.rb_node = NULL;
866
867 out:
868         spin_unlock(&cluster->lock);
869         btrfs_put_block_group(block_group);
870         return 0;
871 }
872
873 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
874 {
875         struct btrfs_free_space *info;
876         struct rb_node *node;
877         struct btrfs_free_cluster *cluster;
878         struct list_head *head;
879
880         spin_lock(&block_group->tree_lock);
881         while ((head = block_group->cluster_list.next) !=
882                &block_group->cluster_list) {
883                 cluster = list_entry(head, struct btrfs_free_cluster,
884                                      block_group_list);
885
886                 WARN_ON(cluster->block_group != block_group);
887                 __btrfs_return_cluster_to_free_space(block_group, cluster);
888                 if (need_resched()) {
889                         spin_unlock(&block_group->tree_lock);
890                         cond_resched();
891                         spin_lock(&block_group->tree_lock);
892                 }
893         }
894
895         while ((node = rb_last(&block_group->free_space_offset)) != NULL) {
896                 info = rb_entry(node, struct btrfs_free_space, offset_index);
897                 unlink_free_space(block_group, info);
898                 if (info->bitmap)
899                         kfree(info->bitmap);
900                 kfree(info);
901                 if (need_resched()) {
902                         spin_unlock(&block_group->tree_lock);
903                         cond_resched();
904                         spin_lock(&block_group->tree_lock);
905                 }
906         }
907
908         spin_unlock(&block_group->tree_lock);
909 }
910
911 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
912                                u64 offset, u64 bytes, u64 empty_size)
913 {
914         struct btrfs_free_space *entry = NULL;
915         u64 bytes_search = bytes + empty_size;
916         u64 ret = 0;
917
918         spin_lock(&block_group->tree_lock);
919         entry = find_free_space(block_group, &offset, &bytes_search, 0);
920         if (!entry)
921                 goto out;
922
923         ret = offset;
924         if (entry->bitmap) {
925                 bitmap_clear_bits(block_group, entry, offset, bytes);
926                 if (!entry->bytes) {
927                         unlink_free_space(block_group, entry);
928                         kfree(entry->bitmap);
929                         kfree(entry);
930                         block_group->total_bitmaps--;
931                         recalculate_thresholds(block_group);
932                 }
933         } else {
934                 unlink_free_space(block_group, entry);
935                 entry->offset += bytes;
936                 entry->bytes -= bytes;
937                 if (!entry->bytes)
938                         kfree(entry);
939                 else
940                         link_free_space(block_group, entry);
941         }
942
943 out:
944         spin_unlock(&block_group->tree_lock);
945
946         return ret;
947 }
948
949 /*
950  * given a cluster, put all of its extents back into the free space
951  * cache.  If a block group is passed, this function will only free
952  * a cluster that belongs to the passed block group.
953  *
954  * Otherwise, it'll get a reference on the block group pointed to by the
955  * cluster and remove the cluster from it.
956  */
957 int btrfs_return_cluster_to_free_space(
958                                struct btrfs_block_group_cache *block_group,
959                                struct btrfs_free_cluster *cluster)
960 {
961         int ret;
962
963         /* first, get a safe pointer to the block group */
964         spin_lock(&cluster->lock);
965         if (!block_group) {
966                 block_group = cluster->block_group;
967                 if (!block_group) {
968                         spin_unlock(&cluster->lock);
969                         return 0;
970                 }
971         } else if (cluster->block_group != block_group) {
972                 /* someone else has already freed it don't redo their work */
973                 spin_unlock(&cluster->lock);
974                 return 0;
975         }
976         atomic_inc(&block_group->count);
977         spin_unlock(&cluster->lock);
978
979         /* now return any extents the cluster had on it */
980         spin_lock(&block_group->tree_lock);
981         ret = __btrfs_return_cluster_to_free_space(block_group, cluster);
982         spin_unlock(&block_group->tree_lock);
983
984         /* finally drop our ref */
985         btrfs_put_block_group(block_group);
986         return ret;
987 }
988
989 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
990                                    struct btrfs_free_cluster *cluster,
991                                    u64 bytes, u64 min_start)
992 {
993         struct btrfs_free_space *entry;
994         int err;
995         u64 search_start = cluster->window_start;
996         u64 search_bytes = bytes;
997         u64 ret = 0;
998
999         spin_lock(&block_group->tree_lock);
1000         spin_lock(&cluster->lock);
1001
1002         if (!cluster->points_to_bitmap)
1003                 goto out;
1004
1005         if (cluster->block_group != block_group)
1006                 goto out;
1007
1008         /*
1009          * search_start is the beginning of the bitmap, but at some point it may
1010          * be a good idea to point to the actual start of the free area in the
1011          * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only
1012          * to 1 to make sure we get the bitmap entry
1013          */
1014         entry = tree_search_offset(block_group,
1015                                    offset_to_bitmap(block_group, search_start),
1016                                    1, 0);
1017         if (!entry || !entry->bitmap)
1018                 goto out;
1019
1020         search_start = min_start;
1021         search_bytes = bytes;
1022
1023         err = search_bitmap(block_group, entry, &search_start,
1024                             &search_bytes);
1025         if (err)
1026                 goto out;
1027
1028         ret = search_start;
1029         bitmap_clear_bits(block_group, entry, ret, bytes);
1030 out:
1031         spin_unlock(&cluster->lock);
1032         spin_unlock(&block_group->tree_lock);
1033
1034         return ret;
1035 }
1036
1037 /*
1038  * given a cluster, try to allocate 'bytes' from it, returns 0
1039  * if it couldn't find anything suitably large, or a logical disk offset
1040  * if things worked out
1041  */
1042 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
1043                              struct btrfs_free_cluster *cluster, u64 bytes,
1044                              u64 min_start)
1045 {
1046         struct btrfs_free_space *entry = NULL;
1047         struct rb_node *node;
1048         u64 ret = 0;
1049
1050         if (cluster->points_to_bitmap)
1051                 return btrfs_alloc_from_bitmap(block_group, cluster, bytes,
1052                                                min_start);
1053
1054         spin_lock(&cluster->lock);
1055         if (bytes > cluster->max_size)
1056                 goto out;
1057
1058         if (cluster->block_group != block_group)
1059                 goto out;
1060
1061         node = rb_first(&cluster->root);
1062         if (!node)
1063                 goto out;
1064
1065         entry = rb_entry(node, struct btrfs_free_space, offset_index);
1066
1067         while(1) {
1068                 if (entry->bytes < bytes || entry->offset < min_start) {
1069                         struct rb_node *node;
1070
1071                         node = rb_next(&entry->offset_index);
1072                         if (!node)
1073                                 break;
1074                         entry = rb_entry(node, struct btrfs_free_space,
1075                                          offset_index);
1076                         continue;
1077                 }
1078                 ret = entry->offset;
1079
1080                 entry->offset += bytes;
1081                 entry->bytes -= bytes;
1082
1083                 if (entry->bytes == 0) {
1084                         rb_erase(&entry->offset_index, &cluster->root);
1085                         kfree(entry);
1086                 }
1087                 break;
1088         }
1089 out:
1090         spin_unlock(&cluster->lock);
1091
1092         return ret;
1093 }
1094
1095 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group,
1096                                 struct btrfs_free_space *entry,
1097                                 struct btrfs_free_cluster *cluster,
1098                                 u64 offset, u64 bytes, u64 min_bytes)
1099 {
1100         unsigned long next_zero;
1101         unsigned long i;
1102         unsigned long search_bits;
1103         unsigned long total_bits;
1104         unsigned long found_bits;
1105         unsigned long start = 0;
1106         unsigned long total_found = 0;
1107         bool found = false;
1108
1109         i = offset_to_bit(entry->offset, block_group->sectorsize,
1110                           max_t(u64, offset, entry->offset));
1111         search_bits = bytes_to_bits(min_bytes, block_group->sectorsize);
1112         total_bits = bytes_to_bits(bytes, block_group->sectorsize);
1113
1114 again:
1115         found_bits = 0;
1116         for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i);
1117              i < BITS_PER_BITMAP;
1118              i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) {
1119                 next_zero = find_next_zero_bit(entry->bitmap,
1120                                                BITS_PER_BITMAP, i);
1121                 if (next_zero - i >= search_bits) {
1122                         found_bits = next_zero - i;
1123                         break;
1124                 }
1125                 i = next_zero;
1126         }
1127
1128         if (!found_bits)
1129                 return -1;
1130
1131         if (!found) {
1132                 start = i;
1133                 found = true;
1134         }
1135
1136         total_found += found_bits;
1137
1138         if (cluster->max_size < found_bits * block_group->sectorsize)
1139                 cluster->max_size = found_bits * block_group->sectorsize;
1140
1141         if (total_found < total_bits) {
1142                 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero);
1143                 if (i - start > total_bits * 2) {
1144                         total_found = 0;
1145                         cluster->max_size = 0;
1146                         found = false;
1147                 }
1148                 goto again;
1149         }
1150
1151         cluster->window_start = start * block_group->sectorsize +
1152                 entry->offset;
1153         cluster->points_to_bitmap = true;
1154
1155         return 0;
1156 }
1157
1158 /*
1159  * here we try to find a cluster of blocks in a block group.  The goal
1160  * is to find at least bytes free and up to empty_size + bytes free.
1161  * We might not find them all in one contiguous area.
1162  *
1163  * returns zero and sets up cluster if things worked out, otherwise
1164  * it returns -enospc
1165  */
1166 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
1167                              struct btrfs_root *root,
1168                              struct btrfs_block_group_cache *block_group,
1169                              struct btrfs_free_cluster *cluster,
1170                              u64 offset, u64 bytes, u64 empty_size)
1171 {
1172         struct btrfs_free_space *entry = NULL;
1173         struct rb_node *node;
1174         struct btrfs_free_space *next;
1175         struct btrfs_free_space *last = NULL;
1176         u64 min_bytes;
1177         u64 window_start;
1178         u64 window_free;
1179         u64 max_extent = 0;
1180         bool found_bitmap = false;
1181         int ret;
1182
1183         /* for metadata, allow allocates with more holes */
1184         if (btrfs_test_opt(root, SSD_SPREAD)) {
1185                 min_bytes = bytes + empty_size;
1186         } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) {
1187                 /*
1188                  * we want to do larger allocations when we are
1189                  * flushing out the delayed refs, it helps prevent
1190                  * making more work as we go along.
1191                  */
1192                 if (trans->transaction->delayed_refs.flushing)
1193                         min_bytes = max(bytes, (bytes + empty_size) >> 1);
1194                 else
1195                         min_bytes = max(bytes, (bytes + empty_size) >> 4);
1196         } else
1197                 min_bytes = max(bytes, (bytes + empty_size) >> 2);
1198
1199         spin_lock(&block_group->tree_lock);
1200         spin_lock(&cluster->lock);
1201
1202         /* someone already found a cluster, hooray */
1203         if (cluster->block_group) {
1204                 ret = 0;
1205                 goto out;
1206         }
1207 again:
1208         entry = tree_search_offset(block_group, offset, found_bitmap, 1);
1209         if (!entry) {
1210                 ret = -ENOSPC;
1211                 goto out;
1212         }
1213
1214         /*
1215          * If found_bitmap is true, we exhausted our search for extent entries,
1216          * and we just want to search all of the bitmaps that we can find, and
1217          * ignore any extent entries we find.
1218          */
1219         while (entry->bitmap || found_bitmap ||
1220                (!entry->bitmap && entry->bytes < min_bytes)) {
1221                 struct rb_node *node = rb_next(&entry->offset_index);
1222
1223                 if (entry->bitmap && entry->bytes > bytes + empty_size) {
1224                         ret = btrfs_bitmap_cluster(block_group, entry, cluster,
1225                                                    offset, bytes + empty_size,
1226                                                    min_bytes);
1227                         if (!ret)
1228                                 goto got_it;
1229                 }
1230
1231                 if (!node) {
1232                         ret = -ENOSPC;
1233                         goto out;
1234                 }
1235                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1236         }
1237
1238         /*
1239          * We already searched all the extent entries from the passed in offset
1240          * to the end and didn't find enough space for the cluster, and we also
1241          * didn't find any bitmaps that met our criteria, just go ahead and exit
1242          */
1243         if (found_bitmap) {
1244                 ret = -ENOSPC;
1245                 goto out;
1246         }
1247
1248         cluster->points_to_bitmap = false;
1249         window_start = entry->offset;
1250         window_free = entry->bytes;
1251         last = entry;
1252         max_extent = entry->bytes;
1253
1254         while (1) {
1255                 /* out window is just right, lets fill it */
1256                 if (window_free >= bytes + empty_size)
1257                         break;
1258
1259                 node = rb_next(&last->offset_index);
1260                 if (!node) {
1261                         if (found_bitmap)
1262                                 goto again;
1263                         ret = -ENOSPC;
1264                         goto out;
1265                 }
1266                 next = rb_entry(node, struct btrfs_free_space, offset_index);
1267
1268                 /*
1269                  * we found a bitmap, so if this search doesn't result in a
1270                  * cluster, we know to go and search again for the bitmaps and
1271                  * start looking for space there
1272                  */
1273                 if (next->bitmap) {
1274                         if (!found_bitmap)
1275                                 offset = next->offset;
1276                         found_bitmap = true;
1277                         last = next;
1278                         continue;
1279                 }
1280
1281                 /*
1282                  * we haven't filled the empty size and the window is
1283                  * very large.  reset and try again
1284                  */
1285                 if (next->offset - (last->offset + last->bytes) > 128 * 1024 ||
1286                     next->offset - window_start > (bytes + empty_size) * 2) {
1287                         entry = next;
1288                         window_start = entry->offset;
1289                         window_free = entry->bytes;
1290                         last = entry;
1291                         max_extent = 0;
1292                 } else {
1293                         last = next;
1294                         window_free += next->bytes;
1295                         if (entry->bytes > max_extent)
1296                                 max_extent = entry->bytes;
1297                 }
1298         }
1299
1300         cluster->window_start = entry->offset;
1301
1302         /*
1303          * now we've found our entries, pull them out of the free space
1304          * cache and put them into the cluster rbtree
1305          *
1306          * The cluster includes an rbtree, but only uses the offset index
1307          * of each free space cache entry.
1308          */
1309         while (1) {
1310                 node = rb_next(&entry->offset_index);
1311                 if (entry->bitmap && node) {
1312                         entry = rb_entry(node, struct btrfs_free_space,
1313                                          offset_index);
1314                         continue;
1315                 } else if (entry->bitmap && !node) {
1316                         break;
1317                 }
1318
1319                 rb_erase(&entry->offset_index, &block_group->free_space_offset);
1320                 ret = tree_insert_offset(&cluster->root, entry->offset,
1321                                          &entry->offset_index, 0);
1322                 BUG_ON(ret);
1323
1324                 if (!node || entry == last)
1325                         break;
1326
1327                 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1328         }
1329
1330         cluster->max_size = max_extent;
1331 got_it:
1332         ret = 0;
1333         atomic_inc(&block_group->count);
1334         list_add_tail(&cluster->block_group_list, &block_group->cluster_list);
1335         cluster->block_group = block_group;
1336 out:
1337         spin_unlock(&cluster->lock);
1338         spin_unlock(&block_group->tree_lock);
1339
1340         return ret;
1341 }
1342
1343 /*
1344  * simple code to zero out a cluster
1345  */
1346 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
1347 {
1348         spin_lock_init(&cluster->lock);
1349         spin_lock_init(&cluster->refill_lock);
1350         cluster->root.rb_node = NULL;
1351         cluster->max_size = 0;
1352         cluster->points_to_bitmap = false;
1353         INIT_LIST_HEAD(&cluster->block_group_list);
1354         cluster->block_group = NULL;
1355 }
1356