Merge branch 'upstream-fixes' into upstream
[linux-2.6] / fs / ext2 / balloc.c
1 /*
2  *  linux/fs/ext2/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13
14 #include <linux/config.h>
15 #include "ext2.h"
16 #include <linux/quotaops.h>
17 #include <linux/sched.h>
18 #include <linux/buffer_head.h>
19 #include <linux/capability.h>
20
21 /*
22  * balloc.c contains the blocks allocation and deallocation routines
23  */
24
25 /*
26  * The free blocks are managed by bitmaps.  A file system contains several
27  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
28  * block for inodes, N blocks for the inode table and data blocks.
29  *
30  * The file system contains group descriptors which are located after the
31  * super block.  Each descriptor contains the number of the bitmap block and
32  * the free blocks count in the block.  The descriptors are loaded in memory
33  * when a file system is mounted (see ext2_read_super).
34  */
35
36
37 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
38
39 struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
40                                              unsigned int block_group,
41                                              struct buffer_head ** bh)
42 {
43         unsigned long group_desc;
44         unsigned long offset;
45         struct ext2_group_desc * desc;
46         struct ext2_sb_info *sbi = EXT2_SB(sb);
47
48         if (block_group >= sbi->s_groups_count) {
49                 ext2_error (sb, "ext2_get_group_desc",
50                             "block_group >= groups_count - "
51                             "block_group = %d, groups_count = %lu",
52                             block_group, sbi->s_groups_count);
53
54                 return NULL;
55         }
56
57         group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
58         offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
59         if (!sbi->s_group_desc[group_desc]) {
60                 ext2_error (sb, "ext2_get_group_desc",
61                             "Group descriptor not loaded - "
62                             "block_group = %d, group_desc = %lu, desc = %lu",
63                              block_group, group_desc, offset);
64                 return NULL;
65         }
66
67         desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
68         if (bh)
69                 *bh = sbi->s_group_desc[group_desc];
70         return desc + offset;
71 }
72
73 /*
74  * Read the bitmap for a given block_group, reading into the specified 
75  * slot in the superblock's bitmap cache.
76  *
77  * Return buffer_head on success or NULL in case of failure.
78  */
79 static struct buffer_head *
80 read_block_bitmap(struct super_block *sb, unsigned int block_group)
81 {
82         struct ext2_group_desc * desc;
83         struct buffer_head * bh = NULL;
84         
85         desc = ext2_get_group_desc (sb, block_group, NULL);
86         if (!desc)
87                 goto error_out;
88         bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
89         if (!bh)
90                 ext2_error (sb, "read_block_bitmap",
91                             "Cannot read block bitmap - "
92                             "block_group = %d, block_bitmap = %u",
93                             block_group, le32_to_cpu(desc->bg_block_bitmap));
94 error_out:
95         return bh;
96 }
97
98 /*
99  * Set sb->s_dirt here because the superblock was "logically" altered.  We
100  * need to recalculate its free blocks count and flush it out.
101  */
102 static int reserve_blocks(struct super_block *sb, int count)
103 {
104         struct ext2_sb_info *sbi = EXT2_SB(sb);
105         struct ext2_super_block *es = sbi->s_es;
106         unsigned free_blocks;
107         unsigned root_blocks;
108
109         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
110         root_blocks = le32_to_cpu(es->s_r_blocks_count);
111
112         if (free_blocks < count)
113                 count = free_blocks;
114
115         if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
116             sbi->s_resuid != current->fsuid &&
117             (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
118                 /*
119                  * We are too close to reserve and we are not privileged.
120                  * Can we allocate anything at all?
121                  */
122                 if (free_blocks > root_blocks)
123                         count = free_blocks - root_blocks;
124                 else
125                         return 0;
126         }
127
128         percpu_counter_mod(&sbi->s_freeblocks_counter, -count);
129         sb->s_dirt = 1;
130         return count;
131 }
132
133 static void release_blocks(struct super_block *sb, int count)
134 {
135         if (count) {
136                 struct ext2_sb_info *sbi = EXT2_SB(sb);
137
138                 percpu_counter_mod(&sbi->s_freeblocks_counter, count);
139                 sb->s_dirt = 1;
140         }
141 }
142
143 static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
144         struct ext2_group_desc *desc, struct buffer_head *bh, int count)
145 {
146         unsigned free_blocks;
147
148         if (!desc->bg_free_blocks_count)
149                 return 0;
150
151         spin_lock(sb_bgl_lock(sbi, group_no));
152         free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
153         if (free_blocks < count)
154                 count = free_blocks;
155         desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
156         spin_unlock(sb_bgl_lock(sbi, group_no));
157         mark_buffer_dirty(bh);
158         return count;
159 }
160
161 static void group_release_blocks(struct super_block *sb, int group_no,
162         struct ext2_group_desc *desc, struct buffer_head *bh, int count)
163 {
164         if (count) {
165                 struct ext2_sb_info *sbi = EXT2_SB(sb);
166                 unsigned free_blocks;
167
168                 spin_lock(sb_bgl_lock(sbi, group_no));
169                 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
170                 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
171                 spin_unlock(sb_bgl_lock(sbi, group_no));
172                 sb->s_dirt = 1;
173                 mark_buffer_dirty(bh);
174         }
175 }
176
177 /* Free given blocks, update quota and i_blocks field */
178 void ext2_free_blocks (struct inode * inode, unsigned long block,
179                        unsigned long count)
180 {
181         struct buffer_head *bitmap_bh = NULL;
182         struct buffer_head * bh2;
183         unsigned long block_group;
184         unsigned long bit;
185         unsigned long i;
186         unsigned long overflow;
187         struct super_block * sb = inode->i_sb;
188         struct ext2_sb_info * sbi = EXT2_SB(sb);
189         struct ext2_group_desc * desc;
190         struct ext2_super_block * es = sbi->s_es;
191         unsigned freed = 0, group_freed;
192
193         if (block < le32_to_cpu(es->s_first_data_block) ||
194             block + count < block ||
195             block + count > le32_to_cpu(es->s_blocks_count)) {
196                 ext2_error (sb, "ext2_free_blocks",
197                             "Freeing blocks not in datazone - "
198                             "block = %lu, count = %lu", block, count);
199                 goto error_return;
200         }
201
202         ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
203
204 do_more:
205         overflow = 0;
206         block_group = (block - le32_to_cpu(es->s_first_data_block)) /
207                       EXT2_BLOCKS_PER_GROUP(sb);
208         bit = (block - le32_to_cpu(es->s_first_data_block)) %
209                       EXT2_BLOCKS_PER_GROUP(sb);
210         /*
211          * Check to see if we are freeing blocks across a group
212          * boundary.
213          */
214         if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
215                 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
216                 count -= overflow;
217         }
218         brelse(bitmap_bh);
219         bitmap_bh = read_block_bitmap(sb, block_group);
220         if (!bitmap_bh)
221                 goto error_return;
222
223         desc = ext2_get_group_desc (sb, block_group, &bh2);
224         if (!desc)
225                 goto error_return;
226
227         if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
228             in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
229             in_range (block, le32_to_cpu(desc->bg_inode_table),
230                       sbi->s_itb_per_group) ||
231             in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
232                       sbi->s_itb_per_group))
233                 ext2_error (sb, "ext2_free_blocks",
234                             "Freeing blocks in system zones - "
235                             "Block = %lu, count = %lu",
236                             block, count);
237
238         for (i = 0, group_freed = 0; i < count; i++) {
239                 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
240                                                 bit + i, bitmap_bh->b_data)) {
241                         ext2_error(sb, __FUNCTION__,
242                                 "bit already cleared for block %lu", block + i);
243                 } else {
244                         group_freed++;
245                 }
246         }
247
248         mark_buffer_dirty(bitmap_bh);
249         if (sb->s_flags & MS_SYNCHRONOUS)
250                 sync_dirty_buffer(bitmap_bh);
251
252         group_release_blocks(sb, block_group, desc, bh2, group_freed);
253         freed += group_freed;
254
255         if (overflow) {
256                 block += count;
257                 count = overflow;
258                 goto do_more;
259         }
260 error_return:
261         brelse(bitmap_bh);
262         release_blocks(sb, freed);
263         DQUOT_FREE_BLOCK(inode, freed);
264 }
265
266 static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
267 {
268         int k;
269         char *p, *r;
270
271         if (!ext2_test_bit(goal, map))
272                 goto got_it;
273
274 repeat:
275         if (goal) {
276                 /*
277                  * The goal was occupied; search forward for a free 
278                  * block within the next XX blocks.
279                  *
280                  * end_goal is more or less random, but it has to be
281                  * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
282                  * next 64-bit boundary is simple..
283                  */
284                 k = (goal + 63) & ~63;
285                 goal = ext2_find_next_zero_bit(map, k, goal);
286                 if (goal < k)
287                         goto got_it;
288                 /*
289                  * Search in the remainder of the current group.
290                  */
291         }
292
293         p = map + (goal >> 3);
294         r = memscan(p, 0, (size - goal + 7) >> 3);
295         k = (r - map) << 3;
296         if (k < size) {
297                 /* 
298                  * We have succeeded in finding a free byte in the block
299                  * bitmap.  Now search backwards to find the start of this
300                  * group of free blocks - won't take more than 7 iterations.
301                  */
302                 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
303                         ;
304                 goto got_it;
305         }
306
307         k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
308         if (k < size) {
309                 goal = k;
310                 goto got_it;
311         }
312         return -1;
313 got_it:
314         if (ext2_set_bit_atomic(lock, goal, (void *) map)) 
315                 goto repeat;    
316         return goal;
317 }
318
319 /*
320  * ext2_new_block uses a goal block to assist allocation.  If the goal is
321  * free, or there is a free block within 32 blocks of the goal, that block
322  * is allocated.  Otherwise a forward search is made for a free block; within 
323  * each block group the search first looks for an entire free byte in the block
324  * bitmap, and then for any free bit if that fails.
325  * This function also updates quota and i_blocks field.
326  */
327 int ext2_new_block(struct inode *inode, unsigned long goal,
328                         u32 *prealloc_count, u32 *prealloc_block, int *err)
329 {
330         struct buffer_head *bitmap_bh = NULL;
331         struct buffer_head *gdp_bh;     /* bh2 */
332         struct ext2_group_desc *desc;
333         int group_no;                   /* i */
334         int ret_block;                  /* j */
335         int group_idx;                  /* k */
336         int target_block;               /* tmp */
337         int block = 0;
338         struct super_block *sb = inode->i_sb;
339         struct ext2_sb_info *sbi = EXT2_SB(sb);
340         struct ext2_super_block *es = sbi->s_es;
341         unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
342         unsigned prealloc_goal = es->s_prealloc_blocks;
343         unsigned group_alloc = 0, es_alloc, dq_alloc;
344         int nr_scanned_groups;
345
346         if (!prealloc_goal--)
347                 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
348         if (!prealloc_count || *prealloc_count)
349                 prealloc_goal = 0;
350
351         if (DQUOT_ALLOC_BLOCK(inode, 1)) {
352                 *err = -EDQUOT;
353                 goto out;
354         }
355
356         while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
357                 prealloc_goal--;
358
359         dq_alloc = prealloc_goal + 1;
360         es_alloc = reserve_blocks(sb, dq_alloc);
361         if (!es_alloc) {
362                 *err = -ENOSPC;
363                 goto out_dquot;
364         }
365
366         ext2_debug ("goal=%lu.\n", goal);
367
368         if (goal < le32_to_cpu(es->s_first_data_block) ||
369             goal >= le32_to_cpu(es->s_blocks_count))
370                 goal = le32_to_cpu(es->s_first_data_block);
371         group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
372         desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
373         if (!desc) {
374                 /*
375                  * gdp_bh may still be uninitialised.  But group_release_blocks
376                  * will not touch it because group_alloc is zero.
377                  */
378                 goto io_error;
379         }
380
381         group_alloc = group_reserve_blocks(sbi, group_no, desc,
382                                         gdp_bh, es_alloc);
383         if (group_alloc) {
384                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
385                                         group_size);
386                 brelse(bitmap_bh);
387                 bitmap_bh = read_block_bitmap(sb, group_no);
388                 if (!bitmap_bh)
389                         goto io_error;
390                 
391                 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
392
393                 ret_block = grab_block(sb_bgl_lock(sbi, group_no),
394                                 bitmap_bh->b_data, group_size, ret_block);
395                 if (ret_block >= 0)
396                         goto got_block;
397                 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
398                 group_alloc = 0;
399         }
400
401         ext2_debug ("Bit not found in block group %d.\n", group_no);
402
403         /*
404          * Now search the rest of the groups.  We assume that 
405          * i and desc correctly point to the last group visited.
406          */
407         nr_scanned_groups = 0;
408 retry:
409         for (group_idx = 0; !group_alloc &&
410                         group_idx < sbi->s_groups_count; group_idx++) {
411                 group_no++;
412                 if (group_no >= sbi->s_groups_count)
413                         group_no = 0;
414                 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
415                 if (!desc)
416                         goto io_error;
417                 group_alloc = group_reserve_blocks(sbi, group_no, desc,
418                                                 gdp_bh, es_alloc);
419         }
420         if (!group_alloc) {
421                 *err = -ENOSPC;
422                 goto out_release;
423         }
424         brelse(bitmap_bh);
425         bitmap_bh = read_block_bitmap(sb, group_no);
426         if (!bitmap_bh)
427                 goto io_error;
428
429         ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
430                                 group_size, 0);
431         if (ret_block < 0) {
432                 /*
433                  * If a free block counter is corrupted we can loop inifintely.
434                  * Detect that here.
435                  */
436                 nr_scanned_groups++;
437                 if (nr_scanned_groups > 2 * sbi->s_groups_count) {
438                         ext2_error(sb, "ext2_new_block",
439                                 "corrupted free blocks counters");
440                         goto io_error;
441                 }
442                 /*
443                  * Someone else grabbed the last free block in this blockgroup
444                  * before us.  Retry the scan.
445                  */
446                 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
447                 group_alloc = 0;
448                 goto retry;
449         }
450
451 got_block:
452         ext2_debug("using block group %d(%d)\n",
453                 group_no, desc->bg_free_blocks_count);
454
455         target_block = ret_block + group_no * group_size +
456                         le32_to_cpu(es->s_first_data_block);
457
458         if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
459             target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
460             in_range(target_block, le32_to_cpu(desc->bg_inode_table),
461                       sbi->s_itb_per_group))
462                 ext2_error (sb, "ext2_new_block",
463                             "Allocating block in system zone - "
464                             "block = %u", target_block);
465
466         if (target_block >= le32_to_cpu(es->s_blocks_count)) {
467                 ext2_error (sb, "ext2_new_block",
468                             "block(%d) >= blocks count(%d) - "
469                             "block_group = %d, es == %p ", ret_block,
470                         le32_to_cpu(es->s_blocks_count), group_no, es);
471                 goto io_error;
472         }
473         block = target_block;
474
475         /* OK, we _had_ allocated something */
476         ext2_debug("found bit %d\n", ret_block);
477
478         dq_alloc--;
479         es_alloc--;
480         group_alloc--;
481
482         /*
483          * Do block preallocation now if required.
484          */
485         write_lock(&EXT2_I(inode)->i_meta_lock);
486         if (group_alloc && !*prealloc_count) {
487                 unsigned n;
488
489                 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
490                         if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
491                                                 ret_block,
492                                                 (void*) bitmap_bh->b_data))
493                                 break;
494                 }
495                 *prealloc_block = block + 1;
496                 *prealloc_count = n;
497                 es_alloc -= n;
498                 dq_alloc -= n;
499                 group_alloc -= n;
500         }
501         write_unlock(&EXT2_I(inode)->i_meta_lock);
502
503         mark_buffer_dirty(bitmap_bh);
504         if (sb->s_flags & MS_SYNCHRONOUS)
505                 sync_dirty_buffer(bitmap_bh);
506
507         ext2_debug ("allocating block %d. ", block);
508
509         *err = 0;
510 out_release:
511         group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
512         release_blocks(sb, es_alloc);
513 out_dquot:
514         DQUOT_FREE_BLOCK(inode, dq_alloc);
515 out:
516         brelse(bitmap_bh);
517         return block;
518
519 io_error:
520         *err = -EIO;
521         goto out_release;
522 }
523
524 unsigned long ext2_count_free_blocks (struct super_block * sb)
525 {
526         struct ext2_group_desc * desc;
527         unsigned long desc_count = 0;
528         int i;
529 #ifdef EXT2FS_DEBUG
530         unsigned long bitmap_count, x;
531         struct ext2_super_block *es;
532
533         lock_super (sb);
534         es = EXT2_SB(sb)->s_es;
535         desc_count = 0;
536         bitmap_count = 0;
537         desc = NULL;
538         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
539                 struct buffer_head *bitmap_bh;
540                 desc = ext2_get_group_desc (sb, i, NULL);
541                 if (!desc)
542                         continue;
543                 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
544                 bitmap_bh = read_block_bitmap(sb, i);
545                 if (!bitmap_bh)
546                         continue;
547                 
548                 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
549                 printk ("group %d: stored = %d, counted = %lu\n",
550                         i, le16_to_cpu(desc->bg_free_blocks_count), x);
551                 bitmap_count += x;
552                 brelse(bitmap_bh);
553         }
554         printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
555                 (long)le32_to_cpu(es->s_free_blocks_count),
556                 desc_count, bitmap_count);
557         unlock_super (sb);
558         return bitmap_count;
559 #else
560         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
561                 desc = ext2_get_group_desc (sb, i, NULL);
562                 if (!desc)
563                         continue;
564                 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
565         }
566         return desc_count;
567 #endif
568 }
569
570 static inline int
571 block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
572 {
573         return ext2_test_bit ((block -
574                 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
575                          EXT2_BLOCKS_PER_GROUP(sb), map);
576 }
577
578 static inline int test_root(int a, int b)
579 {
580         int num = b;
581
582         while (a > num)
583                 num *= b;
584         return num == a;
585 }
586
587 static int ext2_group_sparse(int group)
588 {
589         if (group <= 1)
590                 return 1;
591         return (test_root(group, 3) || test_root(group, 5) ||
592                 test_root(group, 7));
593 }
594
595 /**
596  *      ext2_bg_has_super - number of blocks used by the superblock in group
597  *      @sb: superblock for filesystem
598  *      @group: group number to check
599  *
600  *      Return the number of blocks used by the superblock (primary or backup)
601  *      in this group.  Currently this will be only 0 or 1.
602  */
603 int ext2_bg_has_super(struct super_block *sb, int group)
604 {
605         if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
606             !ext2_group_sparse(group))
607                 return 0;
608         return 1;
609 }
610
611 /**
612  *      ext2_bg_num_gdb - number of blocks used by the group table in group
613  *      @sb: superblock for filesystem
614  *      @group: group number to check
615  *
616  *      Return the number of blocks used by the group descriptor table
617  *      (primary or backup) in this group.  In the future there may be a
618  *      different number of descriptor blocks in each group.
619  */
620 unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
621 {
622         if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
623             !ext2_group_sparse(group))
624                 return 0;
625         return EXT2_SB(sb)->s_gdb_count;
626 }
627