2 * This file is part of UBIFS.
4 * Copyright (C) 2006-2008 Nokia Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
24 * This file implements the budgeting sub-system which is responsible for UBIFS
27 * Factors such as compression, wasted space at the ends of LEBs, space in other
28 * journal heads, the effect of updates on the index, and so on, make it
29 * impossible to accurately predict the amount of space needed. Consequently
30 * approximations are used.
34 #include <linux/writeback.h>
35 #include <asm/div64.h>
38 * When pessimistic budget calculations say that there is no enough space,
39 * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
40 * or committing. The below constants define maximum number of times UBIFS
41 * repeats the operations.
43 #define MAX_SHRINK_RETRIES 8
44 #define MAX_GC_RETRIES 4
45 #define MAX_CMT_RETRIES 2
46 #define MAX_NOSPC_RETRIES 1
49 * The below constant defines amount of dirty pages which should be written
50 * back at when trying to shrink the liability.
52 #define NR_TO_WRITE 16
55 * struct retries_info - information about re-tries while making free space.
56 * @prev_liability: previous liability
57 * @shrink_cnt: how many times the liability was shrinked
58 * @shrink_retries: count of liability shrink re-tries (increased when
59 * liability does not shrink)
60 * @try_gc: GC should be tried first
61 * @gc_retries: how many times GC was run
62 * @cmt_retries: how many times commit has been done
63 * @nospc_retries: how many times GC returned %-ENOSPC
65 * Since we consider budgeting to be the fast-path, and this structure has to
66 * be allocated on stack and zeroed out, we make it smaller using bit-fields.
69 long long prev_liability;
70 unsigned int shrink_cnt;
71 unsigned int shrink_retries:5;
72 unsigned int try_gc:1;
73 unsigned int gc_retries:4;
74 unsigned int cmt_retries:3;
75 unsigned int nospc_retries:1;
79 * shrink_liability - write-back some dirty pages/inodes.
80 * @c: UBIFS file-system description object
81 * @nr_to_write: how many dirty pages to write-back
83 * This function shrinks UBIFS liability by means of writing back some amount
84 * of dirty inodes and their pages. Returns the amount of pages which were
85 * written back. The returned value does not include dirty inodes which were
88 * Note, this function synchronizes even VFS inodes which are locked
89 * (@i_mutex) by the caller of the budgeting function, because write-back does
92 static int shrink_liability(struct ubifs_info *c, int nr_to_write)
95 struct writeback_control wbc = {
96 .sync_mode = WB_SYNC_NONE,
97 .range_end = LLONG_MAX,
98 .nr_to_write = nr_to_write,
101 generic_sync_sb_inodes(c->vfs_sb, &wbc);
102 nr_written = nr_to_write - wbc.nr_to_write;
106 * Re-try again but wait on pages/inodes which are being
107 * written-back concurrently (e.g., by pdflush).
109 memset(&wbc, 0, sizeof(struct writeback_control));
110 wbc.sync_mode = WB_SYNC_ALL;
111 wbc.range_end = LLONG_MAX;
112 wbc.nr_to_write = nr_to_write;
113 generic_sync_sb_inodes(c->vfs_sb, &wbc);
114 nr_written = nr_to_write - wbc.nr_to_write;
117 dbg_budg("%d pages were written back", nr_written);
123 * run_gc - run garbage collector.
124 * @c: UBIFS file-system description object
126 * This function runs garbage collector to make some more free space. Returns
127 * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a
128 * negative error code in case of failure.
130 static int run_gc(struct ubifs_info *c)
134 /* Make some free space by garbage-collecting dirty space */
135 down_read(&c->commit_sem);
136 lnum = ubifs_garbage_collect(c, 1);
137 up_read(&c->commit_sem);
141 /* GC freed one LEB, return it to lprops */
142 dbg_budg("GC freed LEB %d", lnum);
143 err = ubifs_return_leb(c, lnum);
150 * make_free_space - make more free space on the file-system.
151 * @c: UBIFS file-system description object
152 * @ri: information about previous invocations of this function
154 * This function is called when an operation cannot be budgeted because there
155 * is supposedly no free space. But in most cases there is some free space:
156 * o budgeting is pessimistic, so it always budgets more then it is actually
157 * needed, so shrinking the liability is one way to make free space - the
158 * cached data will take less space then it was budgeted for;
159 * o GC may turn some dark space into free space (budgeting treats dark space
161 * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs.
163 * So this function tries to do the above. Returns %-EAGAIN if some free space
164 * was presumably made and the caller has to re-try budgeting the operation.
165 * Returns %-ENOSPC if it couldn't do more free space, and other negative error
168 static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
173 * If we have some dirty pages and inodes (liability), try to write
174 * them back unless this was tried too many times without effect
177 if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) {
180 spin_lock(&c->space_lock);
181 liability = c->budg_idx_growth + c->budg_data_growth +
183 spin_unlock(&c->space_lock);
185 if (ri->prev_liability >= liability) {
186 /* Liability does not shrink, next time try GC then */
187 ri->shrink_retries += 1;
188 if (ri->gc_retries < MAX_GC_RETRIES)
190 dbg_budg("liability did not shrink: retries %d of %d",
191 ri->shrink_retries, MAX_SHRINK_RETRIES);
194 dbg_budg("force write-back (count %d)", ri->shrink_cnt);
195 shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt);
197 ri->prev_liability = liability;
203 * Try to run garbage collector unless it was already tried too many
206 if (ri->gc_retries < MAX_GC_RETRIES) {
208 dbg_budg("run GC, retries %d of %d",
209 ri->gc_retries, MAX_GC_RETRIES);
216 if (err == -EAGAIN) {
217 dbg_budg("GC asked to commit");
218 err = ubifs_run_commit(c);
228 * GC could not make any progress. If this is the first time,
229 * then it makes sense to try to commit, because it might make
232 dbg_budg("GC returned -ENOSPC, retries %d",
234 if (ri->nospc_retries >= MAX_NOSPC_RETRIES)
236 ri->nospc_retries += 1;
239 /* Neither GC nor write-back helped, try to commit */
240 if (ri->cmt_retries < MAX_CMT_RETRIES) {
241 ri->cmt_retries += 1;
242 dbg_budg("run commit, retries %d of %d",
243 ri->cmt_retries, MAX_CMT_RETRIES);
244 err = ubifs_run_commit(c);
253 * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
254 * @c: UBIFS file-system description object
256 * This function calculates and returns the number of eraseblocks which should
257 * be kept for index usage.
259 int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
264 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
266 /* And make sure we have twice the index size of space reserved */
270 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
271 * pair, nor similarly the two variables for the new index size, so we
272 * have to do this costly 64-bit division on fast-path.
274 if (do_div(idx_size, c->leb_size - c->max_idx_node_sz))
279 * The index head is not available for the in-the-gaps method, so add an
280 * extra LEB to compensate.
284 * At present the index needs at least 2 LEBs: one for the index head
285 * and one for in-the-gaps method (which currently does not cater for
286 * the index head and so excludes it from consideration).
294 * ubifs_calc_available - calculate available FS space.
295 * @c: UBIFS file-system description object
296 * @min_idx_lebs: minimum number of LEBs reserved for the index
298 * This function calculates and returns amount of FS space available for use.
300 long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
306 * Force the amount available to the total size reported if the used
309 if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
310 c->budg_data_growth + c->budg_dd_growth == 0) {
311 /* Do the same calculation as for c->block_cnt */
312 available = c->main_lebs - 2;
313 available *= c->leb_size - c->dark_wm;
317 available = c->main_bytes - c->lst.total_used;
320 * Now 'available' contains theoretically available flash space
321 * assuming there is no index, so we have to subtract the space which
322 * is reserved for the index.
324 subtract_lebs = min_idx_lebs;
326 /* Take into account that GC reserves one LEB for its own needs */
330 * The GC journal head LEB is not really accessible. And since
331 * different write types go to different heads, we may count only on
334 subtract_lebs += c->jhead_cnt - 1;
336 /* We also reserve one LEB for deletions, which bypass budgeting */
339 available -= (long long)subtract_lebs * c->leb_size;
341 /* Subtract the dead space which is not available for use */
342 available -= c->lst.total_dead;
345 * Subtract dark space, which might or might not be usable - it depends
346 * on the data which we have on the media and which will be written. If
347 * this is a lot of uncompressed or not-compressible data, the dark
348 * space cannot be used.
350 available -= c->lst.total_dark;
353 * However, there is more dark space. The index may be bigger than
354 * @min_idx_lebs. Those extra LEBs are assumed to be available, but
355 * their dark space is not included in total_dark, so it is subtracted
358 if (c->lst.idx_lebs > min_idx_lebs) {
359 subtract_lebs = c->lst.idx_lebs - min_idx_lebs;
360 available -= subtract_lebs * c->dark_wm;
363 /* The calculations are rough and may end up with a negative number */
364 return available > 0 ? available : 0;
368 * can_use_rp - check whether the user is allowed to use reserved pool.
369 * @c: UBIFS file-system description object
371 * UBIFS has so-called "reserved pool" which is flash space reserved
372 * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock.
373 * This function checks whether current user is allowed to use reserved pool.
374 * Returns %1 current user is allowed to use reserved pool and %0 otherwise.
376 static int can_use_rp(struct ubifs_info *c)
378 if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
379 (c->rp_gid != 0 && in_group_p(c->rp_gid)))
385 * do_budget_space - reserve flash space for index and data growth.
386 * @c: UBIFS file-system description object
388 * This function makes sure UBIFS has enough free eraseblocks for index growth
391 * When budgeting index space, UBIFS reserves twice as more LEBs as the index
392 * would take if it was consolidated and written to the flash. This guarantees
393 * that the "in-the-gaps" commit method always succeeds and UBIFS will always
394 * be able to commit dirty index. So this function basically adds amount of
395 * budgeted index space to the size of the current index, multiplies this by 2,
396 * and makes sure this does not exceed the amount of free eraseblocks.
398 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
399 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
400 * be large, because UBIFS does not do any index consolidation as long as
401 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
402 * will contain a lot of dirt.
403 * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be
404 * consolidated to take up to @c->min_idx_lebs LEBs.
406 * This function returns zero in case of success, and %-ENOSPC in case of
409 static int do_budget_space(struct ubifs_info *c)
411 long long outstanding, available;
412 int lebs, rsvd_idx_lebs, min_idx_lebs;
414 /* First budget index space */
415 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
417 /* Now 'min_idx_lebs' contains number of LEBs to reserve */
418 if (min_idx_lebs > c->lst.idx_lebs)
419 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
424 * The number of LEBs that are available to be used by the index is:
426 * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
427 * @c->lst.taken_empty_lebs
429 * @empty_lebs are available because they are empty. @freeable_cnt are
430 * available because they contain only free and dirty space and the
431 * index allocation always occurs after wbufs are synch'ed.
432 * @idx_gc_cnt are available because they are index LEBs that have been
433 * garbage collected (including trivial GC) and are awaiting the commit
434 * before they can be unmapped - note that the in-the-gaps method will
435 * grab these if it needs them. @taken_empty_lebs are empty_lebs that
436 * have already been allocated for some purpose (also includes those
437 * LEBs on the @idx_gc list).
439 * Note, @taken_empty_lebs may temporarily be higher by one because of
440 * the way we serialize LEB allocations and budgeting. See a comment in
441 * 'ubifs_find_free_space()'.
443 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
444 c->lst.taken_empty_lebs;
445 if (unlikely(rsvd_idx_lebs > lebs)) {
446 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
447 "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
452 available = ubifs_calc_available(c, min_idx_lebs);
453 outstanding = c->budg_data_growth + c->budg_dd_growth;
455 if (unlikely(available < outstanding)) {
456 dbg_budg("out of data space: available %lld, outstanding %lld",
457 available, outstanding);
461 if (available - outstanding <= c->rp_size && !can_use_rp(c))
464 c->min_idx_lebs = min_idx_lebs;
469 * calc_idx_growth - calculate approximate index growth from budgeting request.
470 * @c: UBIFS file-system description object
471 * @req: budgeting request
473 * For now we assume each new node adds one znode. But this is rather poor
474 * approximation, though.
476 static int calc_idx_growth(const struct ubifs_info *c,
477 const struct ubifs_budget_req *req)
481 znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) +
483 return znodes * c->max_idx_node_sz;
487 * calc_data_growth - calculate approximate amount of new data from budgeting
489 * @c: UBIFS file-system description object
490 * @req: budgeting request
492 static int calc_data_growth(const struct ubifs_info *c,
493 const struct ubifs_budget_req *req)
497 data_growth = req->new_ino ? c->inode_budget : 0;
499 data_growth += c->page_budget;
501 data_growth += c->dent_budget;
502 data_growth += req->new_ino_d;
507 * calc_dd_growth - calculate approximate amount of data which makes other data
508 * dirty from budgeting request.
509 * @c: UBIFS file-system description object
510 * @req: budgeting request
512 static int calc_dd_growth(const struct ubifs_info *c,
513 const struct ubifs_budget_req *req)
517 dd_growth = req->dirtied_page ? c->page_budget : 0;
519 if (req->dirtied_ino)
520 dd_growth += c->inode_budget << (req->dirtied_ino - 1);
522 dd_growth += c->dent_budget;
523 dd_growth += req->dirtied_ino_d;
528 * ubifs_budget_space - ensure there is enough space to complete an operation.
529 * @c: UBIFS file-system description object
530 * @req: budget request
532 * This function allocates budget for an operation. It uses pessimistic
533 * approximation of how much flash space the operation needs. The goal of this
534 * function is to make sure UBIFS always has flash space to flush all dirty
535 * pages, dirty inodes, and dirty znodes (liability). This function may force
536 * commit, garbage-collection or write-back. Returns zero in case of success,
537 * %-ENOSPC if there is no free space and other negative error codes in case of
540 int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
542 int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
543 int err, idx_growth, data_growth, dd_growth;
544 struct retries_info ri;
546 ubifs_assert(req->dirtied_ino <= 4);
547 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
549 data_growth = calc_data_growth(c, req);
550 dd_growth = calc_dd_growth(c, req);
551 if (!data_growth && !dd_growth)
553 idx_growth = calc_idx_growth(c, req);
554 memset(&ri, 0, sizeof(struct retries_info));
557 spin_lock(&c->space_lock);
558 ubifs_assert(c->budg_idx_growth >= 0);
559 ubifs_assert(c->budg_data_growth >= 0);
560 ubifs_assert(c->budg_dd_growth >= 0);
562 if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
563 dbg_budg("no space");
564 spin_unlock(&c->space_lock);
568 c->budg_idx_growth += idx_growth;
569 c->budg_data_growth += data_growth;
570 c->budg_dd_growth += dd_growth;
572 err = do_budget_space(c);
574 req->idx_growth = idx_growth;
575 req->data_growth = data_growth;
576 req->dd_growth = dd_growth;
577 spin_unlock(&c->space_lock);
581 /* Restore the old values */
582 c->budg_idx_growth -= idx_growth;
583 c->budg_data_growth -= data_growth;
584 c->budg_dd_growth -= dd_growth;
585 spin_unlock(&c->space_lock);
588 dbg_budg("no space for fast budgeting");
592 err = make_free_space(c, &ri);
593 if (err == -EAGAIN) {
594 dbg_budg("try again");
597 } else if (err == -ENOSPC) {
598 dbg_budg("FS is full, -ENOSPC");
600 if (can_use_rp(c) || c->rp_size == 0)
604 ubifs_err("cannot budget space, error %d", err);
609 * ubifs_release_budget - release budgeted free space.
610 * @c: UBIFS file-system description object
611 * @req: budget request
613 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
614 * since the index changes (which were budgeted for in @req->idx_growth) will
615 * only be written to the media on commit, this function moves the index budget
616 * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
617 * zeroed by the commit operation.
619 void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
621 ubifs_assert(req->dirtied_ino <= 4);
622 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
623 if (!req->recalculate) {
624 ubifs_assert(req->idx_growth >= 0);
625 ubifs_assert(req->data_growth >= 0);
626 ubifs_assert(req->dd_growth >= 0);
629 if (req->recalculate) {
630 req->data_growth = calc_data_growth(c, req);
631 req->dd_growth = calc_dd_growth(c, req);
632 req->idx_growth = calc_idx_growth(c, req);
635 if (!req->data_growth && !req->dd_growth)
638 c->nospace = c->nospace_rp = 0;
641 spin_lock(&c->space_lock);
642 c->budg_idx_growth -= req->idx_growth;
643 c->budg_uncommitted_idx += req->idx_growth;
644 c->budg_data_growth -= req->data_growth;
645 c->budg_dd_growth -= req->dd_growth;
646 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
648 ubifs_assert(c->budg_idx_growth >= 0);
649 ubifs_assert(c->budg_data_growth >= 0);
650 ubifs_assert(c->min_idx_lebs < c->main_lebs);
651 spin_unlock(&c->space_lock);
655 * ubifs_convert_page_budget - convert budget of a new page.
656 * @c: UBIFS file-system description object
658 * This function converts budget which was allocated for a new page of data to
659 * the budget of changing an existing page of data. The latter is smaller then
660 * the former, so this function only does simple re-calculation and does not
661 * involve any write-back.
663 void ubifs_convert_page_budget(struct ubifs_info *c)
665 spin_lock(&c->space_lock);
666 /* Release the index growth reservation */
667 c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
668 /* Release the data growth reservation */
669 c->budg_data_growth -= c->page_budget;
670 /* Increase the dirty data growth reservation instead */
671 c->budg_dd_growth += c->page_budget;
672 /* And re-calculate the indexing space reservation */
673 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
674 spin_unlock(&c->space_lock);
678 * ubifs_release_dirty_inode_budget - release dirty inode budget.
679 * @c: UBIFS file-system description object
680 * @ui: UBIFS inode to release the budget for
682 * This function releases budget corresponding to a dirty inode. It is usually
683 * called when after the inode has been written to the media and marked as
686 void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
687 struct ubifs_inode *ui)
689 struct ubifs_budget_req req = {.dd_growth = c->inode_budget,
690 .dirtied_ino_d = ui->data_len};
692 ubifs_release_budget(c, &req);
696 * ubifs_budg_get_free_space - return amount of free space.
697 * @c: UBIFS file-system description object
699 * This function returns amount of free space on the file-system.
701 long long ubifs_budg_get_free_space(struct ubifs_info *c)
703 int min_idx_lebs, rsvd_idx_lebs;
704 long long available, outstanding, free;
706 /* Do exactly the same calculations as in 'do_budget_space()' */
707 spin_lock(&c->space_lock);
708 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
710 if (min_idx_lebs > c->lst.idx_lebs)
711 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
715 if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
716 - c->lst.taken_empty_lebs) {
717 spin_unlock(&c->space_lock);
721 available = ubifs_calc_available(c, min_idx_lebs);
722 outstanding = c->budg_data_growth + c->budg_dd_growth;
723 c->min_idx_lebs = min_idx_lebs;
724 spin_unlock(&c->space_lock);
726 if (available > outstanding)
727 free = ubifs_reported_space(c, available - outstanding);