Pull percpu-dtc into release branch
[linux-2.6] / fs / ext2 / balloc.c
1 /*
2  *  linux/fs/ext2/balloc.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
10  *  Big-endian to little-endian byte-swapping/bitmaps by
11  *        David S. Miller (davem@caip.rutgers.edu), 1995
12  */
13
14 #include "ext2.h"
15 #include <linux/quotaops.h>
16 #include <linux/sched.h>
17 #include <linux/buffer_head.h>
18 #include <linux/capability.h>
19
20 /*
21  * balloc.c contains the blocks allocation and deallocation routines
22  */
23
24 /*
25  * The free blocks are managed by bitmaps.  A file system contains several
26  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
27  * block for inodes, N blocks for the inode table and data blocks.
28  *
29  * The file system contains group descriptors which are located after the
30  * super block.  Each descriptor contains the number of the bitmap block and
31  * the free blocks count in the block.  The descriptors are loaded in memory
32  * when a file system is mounted (see ext2_fill_super).
33  */
34
35
36 #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
37
38 struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
39                                              unsigned int block_group,
40                                              struct buffer_head ** bh)
41 {
42         unsigned long group_desc;
43         unsigned long offset;
44         struct ext2_group_desc * desc;
45         struct ext2_sb_info *sbi = EXT2_SB(sb);
46
47         if (block_group >= sbi->s_groups_count) {
48                 ext2_error (sb, "ext2_get_group_desc",
49                             "block_group >= groups_count - "
50                             "block_group = %d, groups_count = %lu",
51                             block_group, sbi->s_groups_count);
52
53                 return NULL;
54         }
55
56         group_desc = block_group >> EXT2_DESC_PER_BLOCK_BITS(sb);
57         offset = block_group & (EXT2_DESC_PER_BLOCK(sb) - 1);
58         if (!sbi->s_group_desc[group_desc]) {
59                 ext2_error (sb, "ext2_get_group_desc",
60                             "Group descriptor not loaded - "
61                             "block_group = %d, group_desc = %lu, desc = %lu",
62                              block_group, group_desc, offset);
63                 return NULL;
64         }
65
66         desc = (struct ext2_group_desc *) sbi->s_group_desc[group_desc]->b_data;
67         if (bh)
68                 *bh = sbi->s_group_desc[group_desc];
69         return desc + offset;
70 }
71
72 /*
73  * Read the bitmap for a given block_group, reading into the specified 
74  * slot in the superblock's bitmap cache.
75  *
76  * Return buffer_head on success or NULL in case of failure.
77  */
78 static struct buffer_head *
79 read_block_bitmap(struct super_block *sb, unsigned int block_group)
80 {
81         struct ext2_group_desc * desc;
82         struct buffer_head * bh = NULL;
83         
84         desc = ext2_get_group_desc (sb, block_group, NULL);
85         if (!desc)
86                 goto error_out;
87         bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
88         if (!bh)
89                 ext2_error (sb, "read_block_bitmap",
90                             "Cannot read block bitmap - "
91                             "block_group = %d, block_bitmap = %u",
92                             block_group, le32_to_cpu(desc->bg_block_bitmap));
93 error_out:
94         return bh;
95 }
96
97 /*
98  * Set sb->s_dirt here because the superblock was "logically" altered.  We
99  * need to recalculate its free blocks count and flush it out.
100  */
101 static int reserve_blocks(struct super_block *sb, int count)
102 {
103         struct ext2_sb_info *sbi = EXT2_SB(sb);
104         struct ext2_super_block *es = sbi->s_es;
105         unsigned free_blocks;
106         unsigned root_blocks;
107
108         free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
109         root_blocks = le32_to_cpu(es->s_r_blocks_count);
110
111         if (free_blocks < count)
112                 count = free_blocks;
113
114         if (free_blocks < root_blocks + count && !capable(CAP_SYS_RESOURCE) &&
115             sbi->s_resuid != current->fsuid &&
116             (sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
117                 /*
118                  * We are too close to reserve and we are not privileged.
119                  * Can we allocate anything at all?
120                  */
121                 if (free_blocks > root_blocks)
122                         count = free_blocks - root_blocks;
123                 else
124                         return 0;
125         }
126
127         percpu_counter_mod(&sbi->s_freeblocks_counter, -count);
128         sb->s_dirt = 1;
129         return count;
130 }
131
132 static void release_blocks(struct super_block *sb, int count)
133 {
134         if (count) {
135                 struct ext2_sb_info *sbi = EXT2_SB(sb);
136
137                 percpu_counter_mod(&sbi->s_freeblocks_counter, count);
138                 sb->s_dirt = 1;
139         }
140 }
141
142 static int group_reserve_blocks(struct ext2_sb_info *sbi, int group_no,
143         struct ext2_group_desc *desc, struct buffer_head *bh, int count)
144 {
145         unsigned free_blocks;
146
147         if (!desc->bg_free_blocks_count)
148                 return 0;
149
150         spin_lock(sb_bgl_lock(sbi, group_no));
151         free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
152         if (free_blocks < count)
153                 count = free_blocks;
154         desc->bg_free_blocks_count = cpu_to_le16(free_blocks - count);
155         spin_unlock(sb_bgl_lock(sbi, group_no));
156         mark_buffer_dirty(bh);
157         return count;
158 }
159
160 static void group_release_blocks(struct super_block *sb, int group_no,
161         struct ext2_group_desc *desc, struct buffer_head *bh, int count)
162 {
163         if (count) {
164                 struct ext2_sb_info *sbi = EXT2_SB(sb);
165                 unsigned free_blocks;
166
167                 spin_lock(sb_bgl_lock(sbi, group_no));
168                 free_blocks = le16_to_cpu(desc->bg_free_blocks_count);
169                 desc->bg_free_blocks_count = cpu_to_le16(free_blocks + count);
170                 spin_unlock(sb_bgl_lock(sbi, group_no));
171                 sb->s_dirt = 1;
172                 mark_buffer_dirty(bh);
173         }
174 }
175
176 /* Free given blocks, update quota and i_blocks field */
177 void ext2_free_blocks (struct inode * inode, unsigned long block,
178                        unsigned long count)
179 {
180         struct buffer_head *bitmap_bh = NULL;
181         struct buffer_head * bh2;
182         unsigned long block_group;
183         unsigned long bit;
184         unsigned long i;
185         unsigned long overflow;
186         struct super_block * sb = inode->i_sb;
187         struct ext2_sb_info * sbi = EXT2_SB(sb);
188         struct ext2_group_desc * desc;
189         struct ext2_super_block * es = sbi->s_es;
190         unsigned freed = 0, group_freed;
191
192         if (block < le32_to_cpu(es->s_first_data_block) ||
193             block + count < block ||
194             block + count > le32_to_cpu(es->s_blocks_count)) {
195                 ext2_error (sb, "ext2_free_blocks",
196                             "Freeing blocks not in datazone - "
197                             "block = %lu, count = %lu", block, count);
198                 goto error_return;
199         }
200
201         ext2_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
202
203 do_more:
204         overflow = 0;
205         block_group = (block - le32_to_cpu(es->s_first_data_block)) /
206                       EXT2_BLOCKS_PER_GROUP(sb);
207         bit = (block - le32_to_cpu(es->s_first_data_block)) %
208                       EXT2_BLOCKS_PER_GROUP(sb);
209         /*
210          * Check to see if we are freeing blocks across a group
211          * boundary.
212          */
213         if (bit + count > EXT2_BLOCKS_PER_GROUP(sb)) {
214                 overflow = bit + count - EXT2_BLOCKS_PER_GROUP(sb);
215                 count -= overflow;
216         }
217         brelse(bitmap_bh);
218         bitmap_bh = read_block_bitmap(sb, block_group);
219         if (!bitmap_bh)
220                 goto error_return;
221
222         desc = ext2_get_group_desc (sb, block_group, &bh2);
223         if (!desc)
224                 goto error_return;
225
226         if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
227             in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
228             in_range (block, le32_to_cpu(desc->bg_inode_table),
229                       sbi->s_itb_per_group) ||
230             in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
231                       sbi->s_itb_per_group))
232                 ext2_error (sb, "ext2_free_blocks",
233                             "Freeing blocks in system zones - "
234                             "Block = %lu, count = %lu",
235                             block, count);
236
237         for (i = 0, group_freed = 0; i < count; i++) {
238                 if (!ext2_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
239                                                 bit + i, bitmap_bh->b_data)) {
240                         ext2_error(sb, __FUNCTION__,
241                                 "bit already cleared for block %lu", block + i);
242                 } else {
243                         group_freed++;
244                 }
245         }
246
247         mark_buffer_dirty(bitmap_bh);
248         if (sb->s_flags & MS_SYNCHRONOUS)
249                 sync_dirty_buffer(bitmap_bh);
250
251         group_release_blocks(sb, block_group, desc, bh2, group_freed);
252         freed += group_freed;
253
254         if (overflow) {
255                 block += count;
256                 count = overflow;
257                 goto do_more;
258         }
259 error_return:
260         brelse(bitmap_bh);
261         release_blocks(sb, freed);
262         DQUOT_FREE_BLOCK(inode, freed);
263 }
264
265 static int grab_block(spinlock_t *lock, char *map, unsigned size, int goal)
266 {
267         int k;
268         char *p, *r;
269
270         if (!ext2_test_bit(goal, map))
271                 goto got_it;
272
273 repeat:
274         if (goal) {
275                 /*
276                  * The goal was occupied; search forward for a free 
277                  * block within the next XX blocks.
278                  *
279                  * end_goal is more or less random, but it has to be
280                  * less than EXT2_BLOCKS_PER_GROUP. Aligning up to the
281                  * next 64-bit boundary is simple..
282                  */
283                 k = (goal + 63) & ~63;
284                 goal = ext2_find_next_zero_bit(map, k, goal);
285                 if (goal < k)
286                         goto got_it;
287                 /*
288                  * Search in the remainder of the current group.
289                  */
290         }
291
292         p = map + (goal >> 3);
293         r = memscan(p, 0, (size - goal + 7) >> 3);
294         k = (r - map) << 3;
295         if (k < size) {
296                 /* 
297                  * We have succeeded in finding a free byte in the block
298                  * bitmap.  Now search backwards to find the start of this
299                  * group of free blocks - won't take more than 7 iterations.
300                  */
301                 for (goal = k; goal && !ext2_test_bit (goal - 1, map); goal--)
302                         ;
303                 goto got_it;
304         }
305
306         k = ext2_find_next_zero_bit ((u32 *)map, size, goal);
307         if (k < size) {
308                 goal = k;
309                 goto got_it;
310         }
311         return -1;
312 got_it:
313         if (ext2_set_bit_atomic(lock, goal, (void *) map)) 
314                 goto repeat;    
315         return goal;
316 }
317
318 /*
319  * ext2_new_block uses a goal block to assist allocation.  If the goal is
320  * free, or there is a free block within 32 blocks of the goal, that block
321  * is allocated.  Otherwise a forward search is made for a free block; within 
322  * each block group the search first looks for an entire free byte in the block
323  * bitmap, and then for any free bit if that fails.
324  * This function also updates quota and i_blocks field.
325  */
326 int ext2_new_block(struct inode *inode, unsigned long goal,
327                         u32 *prealloc_count, u32 *prealloc_block, int *err)
328 {
329         struct buffer_head *bitmap_bh = NULL;
330         struct buffer_head *gdp_bh;     /* bh2 */
331         struct ext2_group_desc *desc;
332         int group_no;                   /* i */
333         int ret_block;                  /* j */
334         int group_idx;                  /* k */
335         int target_block;               /* tmp */
336         int block = 0;
337         struct super_block *sb = inode->i_sb;
338         struct ext2_sb_info *sbi = EXT2_SB(sb);
339         struct ext2_super_block *es = sbi->s_es;
340         unsigned group_size = EXT2_BLOCKS_PER_GROUP(sb);
341         unsigned prealloc_goal = es->s_prealloc_blocks;
342         unsigned group_alloc = 0, es_alloc, dq_alloc;
343         int nr_scanned_groups;
344
345         if (!prealloc_goal--)
346                 prealloc_goal = EXT2_DEFAULT_PREALLOC_BLOCKS - 1;
347         if (!prealloc_count || *prealloc_count)
348                 prealloc_goal = 0;
349
350         if (DQUOT_ALLOC_BLOCK(inode, 1)) {
351                 *err = -EDQUOT;
352                 goto out;
353         }
354
355         while (prealloc_goal && DQUOT_PREALLOC_BLOCK(inode, prealloc_goal))
356                 prealloc_goal--;
357
358         dq_alloc = prealloc_goal + 1;
359         es_alloc = reserve_blocks(sb, dq_alloc);
360         if (!es_alloc) {
361                 *err = -ENOSPC;
362                 goto out_dquot;
363         }
364
365         ext2_debug ("goal=%lu.\n", goal);
366
367         if (goal < le32_to_cpu(es->s_first_data_block) ||
368             goal >= le32_to_cpu(es->s_blocks_count))
369                 goal = le32_to_cpu(es->s_first_data_block);
370         group_no = (goal - le32_to_cpu(es->s_first_data_block)) / group_size;
371         desc = ext2_get_group_desc (sb, group_no, &gdp_bh);
372         if (!desc) {
373                 /*
374                  * gdp_bh may still be uninitialised.  But group_release_blocks
375                  * will not touch it because group_alloc is zero.
376                  */
377                 goto io_error;
378         }
379
380         group_alloc = group_reserve_blocks(sbi, group_no, desc,
381                                         gdp_bh, es_alloc);
382         if (group_alloc) {
383                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
384                                         group_size);
385                 brelse(bitmap_bh);
386                 bitmap_bh = read_block_bitmap(sb, group_no);
387                 if (!bitmap_bh)
388                         goto io_error;
389                 
390                 ext2_debug("goal is at %d:%d.\n", group_no, ret_block);
391
392                 ret_block = grab_block(sb_bgl_lock(sbi, group_no),
393                                 bitmap_bh->b_data, group_size, ret_block);
394                 if (ret_block >= 0)
395                         goto got_block;
396                 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
397                 group_alloc = 0;
398         }
399
400         ext2_debug ("Bit not found in block group %d.\n", group_no);
401
402         /*
403          * Now search the rest of the groups.  We assume that 
404          * i and desc correctly point to the last group visited.
405          */
406         nr_scanned_groups = 0;
407 retry:
408         for (group_idx = 0; !group_alloc &&
409                         group_idx < sbi->s_groups_count; group_idx++) {
410                 group_no++;
411                 if (group_no >= sbi->s_groups_count)
412                         group_no = 0;
413                 desc = ext2_get_group_desc(sb, group_no, &gdp_bh);
414                 if (!desc)
415                         goto io_error;
416                 group_alloc = group_reserve_blocks(sbi, group_no, desc,
417                                                 gdp_bh, es_alloc);
418         }
419         if (!group_alloc) {
420                 *err = -ENOSPC;
421                 goto out_release;
422         }
423         brelse(bitmap_bh);
424         bitmap_bh = read_block_bitmap(sb, group_no);
425         if (!bitmap_bh)
426                 goto io_error;
427
428         ret_block = grab_block(sb_bgl_lock(sbi, group_no), bitmap_bh->b_data,
429                                 group_size, 0);
430         if (ret_block < 0) {
431                 /*
432                  * If a free block counter is corrupted we can loop inifintely.
433                  * Detect that here.
434                  */
435                 nr_scanned_groups++;
436                 if (nr_scanned_groups > 2 * sbi->s_groups_count) {
437                         ext2_error(sb, "ext2_new_block",
438                                 "corrupted free blocks counters");
439                         goto io_error;
440                 }
441                 /*
442                  * Someone else grabbed the last free block in this blockgroup
443                  * before us.  Retry the scan.
444                  */
445                 group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
446                 group_alloc = 0;
447                 goto retry;
448         }
449
450 got_block:
451         ext2_debug("using block group %d(%d)\n",
452                 group_no, desc->bg_free_blocks_count);
453
454         target_block = ret_block + group_no * group_size +
455                         le32_to_cpu(es->s_first_data_block);
456
457         if (target_block == le32_to_cpu(desc->bg_block_bitmap) ||
458             target_block == le32_to_cpu(desc->bg_inode_bitmap) ||
459             in_range(target_block, le32_to_cpu(desc->bg_inode_table),
460                       sbi->s_itb_per_group))
461                 ext2_error (sb, "ext2_new_block",
462                             "Allocating block in system zone - "
463                             "block = %u", target_block);
464
465         if (target_block >= le32_to_cpu(es->s_blocks_count)) {
466                 ext2_error (sb, "ext2_new_block",
467                             "block(%d) >= blocks count(%d) - "
468                             "block_group = %d, es == %p ", ret_block,
469                         le32_to_cpu(es->s_blocks_count), group_no, es);
470                 goto io_error;
471         }
472         block = target_block;
473
474         /* OK, we _had_ allocated something */
475         ext2_debug("found bit %d\n", ret_block);
476
477         dq_alloc--;
478         es_alloc--;
479         group_alloc--;
480
481         /*
482          * Do block preallocation now if required.
483          */
484         write_lock(&EXT2_I(inode)->i_meta_lock);
485         if (group_alloc && !*prealloc_count) {
486                 unsigned n;
487
488                 for (n = 0; n < group_alloc && ++ret_block < group_size; n++) {
489                         if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group_no),
490                                                 ret_block,
491                                                 (void*) bitmap_bh->b_data))
492                                 break;
493                 }
494                 *prealloc_block = block + 1;
495                 *prealloc_count = n;
496                 es_alloc -= n;
497                 dq_alloc -= n;
498                 group_alloc -= n;
499         }
500         write_unlock(&EXT2_I(inode)->i_meta_lock);
501
502         mark_buffer_dirty(bitmap_bh);
503         if (sb->s_flags & MS_SYNCHRONOUS)
504                 sync_dirty_buffer(bitmap_bh);
505
506         ext2_debug ("allocating block %d. ", block);
507
508         *err = 0;
509 out_release:
510         group_release_blocks(sb, group_no, desc, gdp_bh, group_alloc);
511         release_blocks(sb, es_alloc);
512 out_dquot:
513         DQUOT_FREE_BLOCK(inode, dq_alloc);
514 out:
515         brelse(bitmap_bh);
516         return block;
517
518 io_error:
519         *err = -EIO;
520         goto out_release;
521 }
522
523 #ifdef EXT2FS_DEBUG
524
525 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
526
527 unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
528 {
529         unsigned int i;
530         unsigned long sum = 0;
531
532         if (!map)
533                 return (0);
534         for (i = 0; i < numchars; i++)
535                 sum += nibblemap[map->b_data[i] & 0xf] +
536                         nibblemap[(map->b_data[i] >> 4) & 0xf];
537         return (sum);
538 }
539
540 #endif  /*  EXT2FS_DEBUG  */
541
542 unsigned long ext2_count_free_blocks (struct super_block * sb)
543 {
544         struct ext2_group_desc * desc;
545         unsigned long desc_count = 0;
546         int i;
547 #ifdef EXT2FS_DEBUG
548         unsigned long bitmap_count, x;
549         struct ext2_super_block *es;
550
551         es = EXT2_SB(sb)->s_es;
552         desc_count = 0;
553         bitmap_count = 0;
554         desc = NULL;
555         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
556                 struct buffer_head *bitmap_bh;
557                 desc = ext2_get_group_desc (sb, i, NULL);
558                 if (!desc)
559                         continue;
560                 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
561                 bitmap_bh = read_block_bitmap(sb, i);
562                 if (!bitmap_bh)
563                         continue;
564                 
565                 x = ext2_count_free(bitmap_bh, sb->s_blocksize);
566                 printk ("group %d: stored = %d, counted = %lu\n",
567                         i, le16_to_cpu(desc->bg_free_blocks_count), x);
568                 bitmap_count += x;
569                 brelse(bitmap_bh);
570         }
571         printk("ext2_count_free_blocks: stored = %lu, computed = %lu, %lu\n",
572                 (long)le32_to_cpu(es->s_free_blocks_count),
573                 desc_count, bitmap_count);
574         return bitmap_count;
575 #else
576         for (i = 0; i < EXT2_SB(sb)->s_groups_count; i++) {
577                 desc = ext2_get_group_desc (sb, i, NULL);
578                 if (!desc)
579                         continue;
580                 desc_count += le16_to_cpu(desc->bg_free_blocks_count);
581         }
582         return desc_count;
583 #endif
584 }
585
586 static inline int
587 block_in_use(unsigned long block, struct super_block *sb, unsigned char *map)
588 {
589         return ext2_test_bit ((block -
590                 le32_to_cpu(EXT2_SB(sb)->s_es->s_first_data_block)) %
591                          EXT2_BLOCKS_PER_GROUP(sb), map);
592 }
593
594 static inline int test_root(int a, int b)
595 {
596         int num = b;
597
598         while (a > num)
599                 num *= b;
600         return num == a;
601 }
602
603 static int ext2_group_sparse(int group)
604 {
605         if (group <= 1)
606                 return 1;
607         return (test_root(group, 3) || test_root(group, 5) ||
608                 test_root(group, 7));
609 }
610
611 /**
612  *      ext2_bg_has_super - number of blocks used by the superblock in group
613  *      @sb: superblock for filesystem
614  *      @group: group number to check
615  *
616  *      Return the number of blocks used by the superblock (primary or backup)
617  *      in this group.  Currently this will be only 0 or 1.
618  */
619 int ext2_bg_has_super(struct super_block *sb, int group)
620 {
621         if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
622             !ext2_group_sparse(group))
623                 return 0;
624         return 1;
625 }
626
627 /**
628  *      ext2_bg_num_gdb - number of blocks used by the group table in group
629  *      @sb: superblock for filesystem
630  *      @group: group number to check
631  *
632  *      Return the number of blocks used by the group descriptor table
633  *      (primary or backup) in this group.  In the future there may be a
634  *      different number of descriptor blocks in each group.
635  */
636 unsigned long ext2_bg_num_gdb(struct super_block *sb, int group)
637 {
638         if (EXT2_HAS_RO_COMPAT_FEATURE(sb,EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)&&
639             !ext2_group_sparse(group))
640                 return 0;
641         return EXT2_SB(sb)->s_gdb_count;
642 }
643