2 * This file is part of UBIFS.
4 * Copyright (C) 2006-2008 Nokia Corporation.
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
24 * This file is a part of UBIFS journal implementation and contains various
25 * functions which manipulate the log. The log is a fixed area on the flash
26 * which does not contain any data but refers to buds. The log is a part of the
32 #ifdef CONFIG_UBIFS_FS_DEBUG
33 static int dbg_check_bud_bytes(struct ubifs_info *c);
35 #define dbg_check_bud_bytes(c) 0
39 * ubifs_search_bud - search bud LEB.
40 * @c: UBIFS file-system description object
41 * @lnum: logical eraseblock number to search
43 * This function searches bud LEB @lnum. Returns bud description object in case
44 * of success and %NULL if there is no bud with this LEB number.
46 struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum)
49 struct ubifs_bud *bud;
51 spin_lock(&c->buds_lock);
54 bud = rb_entry(p, struct ubifs_bud, rb);
57 else if (lnum > bud->lnum)
60 spin_unlock(&c->buds_lock);
64 spin_unlock(&c->buds_lock);
69 * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one.
70 * @c: UBIFS file-system description object
71 * @lnum: logical eraseblock number to search
73 * This functions returns the wbuf for @lnum or %NULL if there is not one.
75 struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
78 struct ubifs_bud *bud;
84 spin_lock(&c->buds_lock);
87 bud = rb_entry(p, struct ubifs_bud, rb);
90 else if (lnum > bud->lnum)
94 spin_unlock(&c->buds_lock);
95 return &c->jheads[jhead].wbuf;
98 spin_unlock(&c->buds_lock);
103 * next_log_lnum - switch to the next log LEB.
104 * @c: UBIFS file-system description object
105 * @lnum: current log LEB
107 static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
110 if (lnum > c->log_last)
111 lnum = UBIFS_LOG_LNUM;
117 * empty_log_bytes - calculate amount of empty space in the log.
118 * @c: UBIFS file-system description object
120 static inline long long empty_log_bytes(const struct ubifs_info *c)
124 h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
125 t = (long long)c->ltail_lnum * c->leb_size;
128 return c->log_bytes - h + t;
134 * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list.
135 * @c: UBIFS file-system description object
136 * @bud: the bud to add
138 void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
140 struct rb_node **p, *parent = NULL;
142 struct ubifs_jhead *jhead;
144 spin_lock(&c->buds_lock);
145 p = &c->buds.rb_node;
148 b = rb_entry(parent, struct ubifs_bud, rb);
149 ubifs_assert(bud->lnum != b->lnum);
150 if (bud->lnum < b->lnum)
156 rb_link_node(&bud->rb, parent, p);
157 rb_insert_color(&bud->rb, &c->buds);
159 jhead = &c->jheads[bud->jhead];
160 list_add_tail(&bud->list, &jhead->buds_list);
162 ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY));
165 * Note, although this is a new bud, we anyway account this space now,
166 * before any data has been written to it, because this is about to
167 * guarantee fixed mount time, and this bud will anyway be read and
170 c->bud_bytes += c->leb_size - bud->start;
172 dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum,
173 bud->start, bud->jhead, c->bud_bytes);
174 spin_unlock(&c->buds_lock);
178 * ubifs_create_buds_lists - create journal head buds lists for remount rw.
179 * @c: UBIFS file-system description object
181 void ubifs_create_buds_lists(struct ubifs_info *c)
185 spin_lock(&c->buds_lock);
186 p = rb_first(&c->buds);
188 struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
189 struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
191 list_add_tail(&bud->list, &jhead->buds_list);
194 spin_unlock(&c->buds_lock);
198 * ubifs_add_bud_to_log - add a new bud to the log.
199 * @c: UBIFS file-system description object
200 * @jhead: journal head the bud belongs to
201 * @lnum: LEB number of the bud
202 * @offs: starting offset of the bud
204 * This function writes reference node for the new bud LEB @lnum it to the log,
205 * and adds it to the buds tress. It also makes sure that log size does not
206 * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success,
207 * %-EAGAIN if commit is required, and a negative error codes in case of
210 int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
213 struct ubifs_bud *bud;
214 struct ubifs_ref_node *ref;
216 bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS);
219 ref = kzalloc(c->ref_node_alsz, GFP_NOFS);
225 mutex_lock(&c->log_mutex);
232 /* Make sure we have enough space in the log */
233 if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) {
234 dbg_log("not enough log space - %lld, required %d",
235 empty_log_bytes(c), c->min_log_bytes);
236 ubifs_commit_required(c);
242 * Make sure the the amount of space in buds will not exceed
243 * 'c->max_bud_bytes' limit, because we want to guarantee mount time
246 * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes
247 * because we are holding @c->log_mutex. All @c->bud_bytes take place
248 * when both @c->log_mutex and @c->bud_bytes are locked.
250 if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) {
251 dbg_log("bud bytes %lld (%lld max), require commit",
252 c->bud_bytes, c->max_bud_bytes);
253 ubifs_commit_required(c);
259 * If the journal is full enough - start background commit. Note, it is
260 * OK to read 'c->cmt_state' without spinlock because integer reads
261 * are atomic in the kernel.
263 if (c->bud_bytes >= c->bg_bud_bytes &&
264 c->cmt_state == COMMIT_RESTING) {
265 dbg_log("bud bytes %lld (%lld max), initiate BG commit",
266 c->bud_bytes, c->max_bud_bytes);
267 ubifs_request_bg_commit(c);
274 ref->ch.node_type = UBIFS_REF_NODE;
275 ref->lnum = cpu_to_le32(bud->lnum);
276 ref->offs = cpu_to_le32(bud->start);
277 ref->jhead = cpu_to_le32(jhead);
279 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
280 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
284 if (c->lhead_offs == 0) {
285 /* Must ensure next log LEB has been unmapped */
286 err = ubifs_leb_unmap(c, c->lhead_lnum);
291 if (bud->start == 0) {
293 * Before writing the LEB reference which refers an empty LEB
294 * to the log, we have to make sure it is mapped, because
295 * otherwise we'd risk to refer an LEB with garbage in case of
296 * an unclean reboot, because the target LEB might have been
297 * unmapped, but not yet physically erased.
299 err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM);
304 dbg_log("write ref LEB %d:%d",
305 c->lhead_lnum, c->lhead_offs);
306 err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum,
307 c->lhead_offs, UBI_SHORTTERM);
311 c->lhead_offs += c->ref_node_alsz;
313 ubifs_add_bud(c, bud);
315 mutex_unlock(&c->log_mutex);
320 mutex_unlock(&c->log_mutex);
327 * remove_buds - remove used buds.
328 * @c: UBIFS file-system description object
330 * This function removes use buds from the buds tree. It does not remove the
331 * buds which are pointed to by journal heads.
333 static void remove_buds(struct ubifs_info *c)
337 ubifs_assert(list_empty(&c->old_buds));
338 c->cmt_bud_bytes = 0;
339 spin_lock(&c->buds_lock);
340 p = rb_first(&c->buds);
342 struct rb_node *p1 = p;
343 struct ubifs_bud *bud;
344 struct ubifs_wbuf *wbuf;
347 bud = rb_entry(p1, struct ubifs_bud, rb);
348 wbuf = &c->jheads[bud->jhead].wbuf;
350 if (wbuf->lnum == bud->lnum) {
352 * Do not remove buds which are pointed to by journal
353 * heads (non-closed buds).
355 c->cmt_bud_bytes += wbuf->offs - bud->start;
356 dbg_log("preserve %d:%d, jhead %d, bud bytes %d, "
357 "cmt_bud_bytes %lld", bud->lnum, bud->start,
358 bud->jhead, wbuf->offs - bud->start,
360 bud->start = wbuf->offs;
362 c->cmt_bud_bytes += c->leb_size - bud->start;
363 dbg_log("remove %d:%d, jhead %d, bud bytes %d, "
364 "cmt_bud_bytes %lld", bud->lnum, bud->start,
365 bud->jhead, c->leb_size - bud->start,
367 rb_erase(p1, &c->buds);
368 list_del(&bud->list);
370 * If the commit does not finish, the recovery will need
371 * to replay the journal, in which case the old buds
372 * must be unchanged. Do not release them until post
373 * commit i.e. do not allow them to be garbage
376 list_add(&bud->list, &c->old_buds);
379 spin_unlock(&c->buds_lock);
383 * ubifs_log_start_commit - start commit.
384 * @c: UBIFS file-system description object
385 * @ltail_lnum: return new log tail LEB number
387 * The commit operation starts with writing "commit start" node to the log and
388 * reference nodes for all journal heads which will define new journal after
389 * the commit has been finished. The commit start and reference nodes are
390 * written in one go to the nearest empty log LEB (hence, when commit is
391 * finished UBIFS may safely unmap all the previous log LEBs). This function
392 * returns zero in case of success and a negative error code in case of
395 int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
398 struct ubifs_cs_node *cs;
399 struct ubifs_ref_node *ref;
400 int err, i, max_len, len;
402 err = dbg_check_bud_bytes(c);
406 max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ;
407 max_len = ALIGN(max_len, c->min_io_size);
408 buf = cs = kmalloc(max_len, GFP_NOFS);
412 cs->ch.node_type = UBIFS_CS_NODE;
413 cs->cmt_no = cpu_to_le64(c->cmt_no + 1);
414 ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
417 * Note, we do not lock 'c->log_mutex' because this is the commit start
418 * phase and we are exclusively using the log. And we do not lock
419 * write-buffer because nobody can write to the file-system at this
423 len = UBIFS_CS_NODE_SZ;
424 for (i = 0; i < c->jhead_cnt; i++) {
425 int lnum = c->jheads[i].wbuf.lnum;
426 int offs = c->jheads[i].wbuf.offs;
428 if (lnum == -1 || offs == c->leb_size)
431 dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i);
433 ref->ch.node_type = UBIFS_REF_NODE;
434 ref->lnum = cpu_to_le32(lnum);
435 ref->offs = cpu_to_le32(offs);
436 ref->jhead = cpu_to_le32(i);
438 ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0);
439 len += UBIFS_REF_NODE_SZ;
442 ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
444 /* Switch to the next log LEB */
446 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
450 if (c->lhead_offs == 0) {
451 /* Must ensure next LEB has been unmapped */
452 err = ubifs_leb_unmap(c, c->lhead_lnum);
457 len = ALIGN(len, c->min_io_size);
458 dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
459 err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM);
463 *ltail_lnum = c->lhead_lnum;
465 c->lhead_offs += len;
466 if (c->lhead_offs == c->leb_size) {
467 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
474 * We have started the commit and now users may use the rest of the log
477 c->min_log_bytes = 0;
485 * ubifs_log_end_commit - end commit.
486 * @c: UBIFS file-system description object
487 * @ltail_lnum: new log tail LEB number
489 * This function is called on when the commit operation was finished. It
490 * moves log tail to new position and unmaps LEBs which contain obsolete data.
491 * Returns zero in case of success and a negative error code in case of
494 int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
499 * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS
500 * writes during commit. Its only short "commit" start phase when
501 * writers are blocked.
503 mutex_lock(&c->log_mutex);
505 dbg_log("old tail was LEB %d:0, new tail is LEB %d:0",
506 c->ltail_lnum, ltail_lnum);
508 c->ltail_lnum = ltail_lnum;
510 * The commit is finished and from now on it must be guaranteed that
511 * there is always enough space for the next commit.
513 c->min_log_bytes = c->leb_size;
515 spin_lock(&c->buds_lock);
516 c->bud_bytes -= c->cmt_bud_bytes;
517 spin_unlock(&c->buds_lock);
519 err = dbg_check_bud_bytes(c);
521 mutex_unlock(&c->log_mutex);
526 * ubifs_log_post_commit - things to do after commit is completed.
527 * @c: UBIFS file-system description object
528 * @old_ltail_lnum: old log tail LEB number
530 * Release buds only after commit is completed, because they must be unchanged
531 * if recovery is needed.
533 * Unmap log LEBs only after commit is completed, because they may be needed for
536 * This function returns %0 on success and a negative error code on failure.
538 int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
542 while (!list_empty(&c->old_buds)) {
543 struct ubifs_bud *bud;
545 bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
546 err = ubifs_return_leb(c, bud->lnum);
549 list_del(&bud->list);
552 mutex_lock(&c->log_mutex);
553 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
554 lnum = next_log_lnum(c, lnum)) {
555 dbg_log("unmap log LEB %d", lnum);
556 err = ubifs_leb_unmap(c, lnum);
561 mutex_unlock(&c->log_mutex);
566 * struct done_ref - references that have been done.
576 * done_already - determine if a reference has been done already.
577 * @done_tree: rb-tree to store references that have been done
578 * @lnum: LEB number of reference
580 * This function returns %1 if the reference has been done, %0 if not, otherwise
581 * a negative error code is returned.
583 static int done_already(struct rb_root *done_tree, int lnum)
585 struct rb_node **p = &done_tree->rb_node, *parent = NULL;
590 dr = rb_entry(parent, struct done_ref, rb);
593 else if (lnum > dr->lnum)
599 dr = kzalloc(sizeof(struct done_ref), GFP_NOFS);
605 rb_link_node(&dr->rb, parent, p);
606 rb_insert_color(&dr->rb, done_tree);
612 * destroy_done_tree - destroy the done tree.
613 * @done_tree: done tree to destroy
615 static void destroy_done_tree(struct rb_root *done_tree)
617 struct rb_node *this = done_tree->rb_node;
622 this = this->rb_left;
624 } else if (this->rb_right) {
625 this = this->rb_right;
628 dr = rb_entry(this, struct done_ref, rb);
629 this = rb_parent(this);
631 if (this->rb_left == &dr->rb)
632 this->rb_left = NULL;
634 this->rb_right = NULL;
641 * add_node - add a node to the consolidated log.
642 * @c: UBIFS file-system description object
643 * @buf: buffer to which to add
644 * @lnum: LEB number to which to write is passed and returned here
645 * @offs: offset to where to write is passed and returned here
648 * This function returns %0 on success and a negative error code on failure.
650 static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
653 struct ubifs_ch *ch = node;
654 int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs;
657 int sz = ALIGN(*offs, c->min_io_size), err;
659 ubifs_pad(c, buf + *offs, sz - *offs);
660 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
663 *lnum = next_log_lnum(c, *lnum);
666 memcpy(buf + *offs, node, len);
667 *offs += ALIGN(len, 8);
672 * ubifs_consolidate_log - consolidate the log.
673 * @c: UBIFS file-system description object
675 * Repeated failed commits could cause the log to be full, but at least 1 LEB is
676 * needed for commit. This function rewrites the reference nodes in the log
677 * omitting duplicates, and failed CS nodes, and leaving no gaps.
679 * This function returns %0 on success and a negative error code on failure.
681 int ubifs_consolidate_log(struct ubifs_info *c)
683 struct ubifs_scan_leb *sleb;
684 struct ubifs_scan_node *snod;
685 struct rb_root done_tree = RB_ROOT;
686 int lnum, err, first = 1, write_lnum, offs = 0;
689 dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum,
691 buf = vmalloc(c->leb_size);
694 lnum = c->ltail_lnum;
697 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
702 list_for_each_entry(snod, &sleb->nodes, list) {
703 switch (snod->type) {
704 case UBIFS_REF_NODE: {
705 struct ubifs_ref_node *ref = snod->node;
706 int ref_lnum = le32_to_cpu(ref->lnum);
708 err = done_already(&done_tree, ref_lnum);
712 err = add_node(c, buf, &write_lnum,
722 err = add_node(c, buf, &write_lnum, &offs,
730 ubifs_scan_destroy(sleb);
731 if (lnum == c->lhead_lnum)
733 lnum = next_log_lnum(c, lnum);
736 int sz = ALIGN(offs, c->min_io_size);
738 ubifs_pad(c, buf + offs, sz - offs);
739 err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM);
742 offs = ALIGN(offs, c->min_io_size);
744 destroy_done_tree(&done_tree);
746 if (write_lnum == c->lhead_lnum) {
747 ubifs_err("log is too full");
750 /* Unmap remaining LEBs */
753 lnum = next_log_lnum(c, lnum);
754 err = ubifs_leb_unmap(c, lnum);
757 } while (lnum != c->lhead_lnum);
758 c->lhead_lnum = write_lnum;
759 c->lhead_offs = offs;
760 dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs);
764 ubifs_scan_destroy(sleb);
766 destroy_done_tree(&done_tree);
771 #ifdef CONFIG_UBIFS_FS_DEBUG
774 * dbg_check_bud_bytes - make sure bud bytes calculation are all right.
775 * @c: UBIFS file-system description object
777 * This function makes sure the amount of flash space used by closed buds
778 * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in
781 static int dbg_check_bud_bytes(struct ubifs_info *c)
784 struct ubifs_bud *bud;
785 long long bud_bytes = 0;
787 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
790 spin_lock(&c->buds_lock);
791 for (i = 0; i < c->jhead_cnt; i++)
792 list_for_each_entry(bud, &c->jheads[i].buds_list, list)
793 bud_bytes += c->leb_size - bud->start;
795 if (c->bud_bytes != bud_bytes) {
796 ubifs_err("bad bud_bytes %lld, calculated %lld",
797 c->bud_bytes, bud_bytes);
800 spin_unlock(&c->buds_lock);
805 #endif /* CONFIG_UBIFS_FS_DEBUG */