2 * Copyright (c) International Business Machines Corp., 2006
3 * Copyright (c) Nokia Corporation, 2006, 2007
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * Author: Artem Bityutskiy (Битюцкий Артём)
23 * This file includes volume table manipulation code. The volume table is an
24 * on-flash table containing volume meta-data like name, number of reserved
25 * physical eraseblocks, type, etc. The volume table is stored in the so-called
28 * The layout volume is an internal volume which is organized as follows. It
29 * consists of two logical eraseblocks - LEB 0 and LEB 1. Each logical
30 * eraseblock stores one volume table copy, i.e. LEB 0 and LEB 1 duplicate each
31 * other. This redundancy guarantees robustness to unclean reboots. The volume
32 * table is basically an array of volume table records. Each record contains
33 * full information about the volume and protected by a CRC checksum.
35 * The volume table is changed, it is first changed in RAM. Then LEB 0 is
36 * erased, and the updated volume table is written back to LEB 0. Then same for
37 * LEB 1. This scheme guarantees recoverability from unclean reboots.
39 * In this UBI implementation the on-flash volume table does not contain any
40 * information about how many data static volumes contain. This information may
41 * be found from the scanning data.
43 * But it would still be beneficial to store this information in the volume
44 * table. For example, suppose we have a static volume X, and all its physical
45 * eraseblocks became bad for some reasons. Suppose we are attaching the
46 * corresponding MTD device, the scanning has found no logical eraseblocks
47 * corresponding to the volume X. According to the volume table volume X does
48 * exist. So we don't know whether it is just empty or all its physical
49 * eraseblocks went bad. So we cannot alarm the user about this corruption.
51 * The volume table also stores so-called "update marker", which is used for
52 * volume updates. Before updating the volume, the update marker is set, and
53 * after the update operation is finished, the update marker is cleared. So if
54 * the update operation was interrupted (e.g. by an unclean reboot) - the
55 * update marker is still there and we know that the volume's contents is
59 #include <linux/crc32.h>
60 #include <linux/err.h>
61 #include <asm/div64.h>
64 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
65 static void paranoid_vtbl_check(const struct ubi_device *ubi);
67 #define paranoid_vtbl_check(ubi)
70 /* Empty volume table record */
71 static struct ubi_vtbl_record empty_vtbl_record;
74 * ubi_change_vtbl_record - change volume table record.
75 * @ubi: UBI device description object
76 * @idx: table index to change
77 * @vtbl_rec: new volume table record
79 * This function changes volume table record @idx. If @vtbl_rec is %NULL, empty
80 * volume table record is written. The caller does not have to calculate CRC of
81 * the record as it is done by this function. Returns zero in case of success
82 * and a negative error code in case of failure.
84 int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
85 struct ubi_vtbl_record *vtbl_rec)
89 struct ubi_volume *layout_vol;
91 ubi_assert(idx >= 0 && idx < ubi->vtbl_slots);
92 layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)];
95 vtbl_rec = &empty_vtbl_record;
97 crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC);
98 vtbl_rec->crc = cpu_to_be32(crc);
101 memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record));
102 for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
103 err = ubi_eba_unmap_leb(ubi, layout_vol, i);
107 err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0,
108 ubi->vtbl_size, UBI_LONGTERM);
113 paranoid_vtbl_check(ubi);
118 * vtbl_check - check if volume table is not corrupted and contains sensible
120 * @ubi: UBI device description object
121 * @vtbl: volume table
123 * This function returns zero if @vtbl is all right, %1 if CRC is incorrect,
124 * and %-EINVAL if it contains inconsistent data.
126 static int vtbl_check(const struct ubi_device *ubi,
127 const struct ubi_vtbl_record *vtbl)
129 int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len;
134 for (i = 0; i < ubi->vtbl_slots; i++) {
137 reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
138 alignment = be32_to_cpu(vtbl[i].alignment);
139 data_pad = be32_to_cpu(vtbl[i].data_pad);
140 upd_marker = vtbl[i].upd_marker;
141 vol_type = vtbl[i].vol_type;
142 name_len = be16_to_cpu(vtbl[i].name_len);
143 name = &vtbl[i].name[0];
145 crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC);
146 if (be32_to_cpu(vtbl[i].crc) != crc) {
147 ubi_err("bad CRC at record %u: %#08x, not %#08x",
148 i, crc, be32_to_cpu(vtbl[i].crc));
149 ubi_dbg_dump_vtbl_record(&vtbl[i], i);
153 if (reserved_pebs == 0) {
154 if (memcmp(&vtbl[i], &empty_vtbl_record,
155 UBI_VTBL_RECORD_SIZE)) {
162 if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 ||
168 if (alignment > ubi->leb_size || alignment == 0) {
173 n = alignment % ubi->min_io_size;
174 if (alignment != 1 && n) {
179 n = ubi->leb_size % alignment;
181 dbg_err("bad data_pad, has to be %d", n);
186 if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
191 if (upd_marker != 0 && upd_marker != 1) {
196 if (reserved_pebs > ubi->good_peb_count) {
197 dbg_err("too large reserved_pebs, good PEBs %d",
198 ubi->good_peb_count);
203 if (name_len > UBI_VOL_NAME_MAX) {
208 if (name[0] == '\0') {
213 if (name_len != strnlen(name, name_len + 1)) {
219 /* Checks that all names are unique */
220 for (i = 0; i < ubi->vtbl_slots - 1; i++) {
221 for (n = i + 1; n < ubi->vtbl_slots; n++) {
222 int len1 = be16_to_cpu(vtbl[i].name_len);
223 int len2 = be16_to_cpu(vtbl[n].name_len);
225 if (len1 > 0 && len1 == len2 &&
226 !strncmp(vtbl[i].name, vtbl[n].name, len1)) {
227 ubi_err("volumes %d and %d have the same name"
228 " \"%s\"", i, n, vtbl[i].name);
229 ubi_dbg_dump_vtbl_record(&vtbl[i], i);
230 ubi_dbg_dump_vtbl_record(&vtbl[n], n);
239 ubi_err("volume table check failed: record %d, error %d", i, err);
240 ubi_dbg_dump_vtbl_record(&vtbl[i], i);
245 * create_vtbl - create a copy of volume table.
246 * @ubi: UBI device description object
247 * @si: scanning information
248 * @copy: number of the volume table copy
249 * @vtbl: contents of the volume table
251 * This function returns zero in case of success and a negative error code in
254 static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si,
255 int copy, void *vtbl)
258 static struct ubi_vid_hdr *vid_hdr;
259 struct ubi_scan_volume *sv;
260 struct ubi_scan_leb *new_seb, *old_seb = NULL;
262 ubi_msg("create volume table (copy #%d)", copy + 1);
264 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
269 * Check if there is a logical eraseblock which would have to contain
270 * this volume table copy was found during scanning. It has to be wiped
273 sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID);
275 old_seb = ubi_scan_find_seb(sv, copy);
278 new_seb = ubi_scan_get_free_peb(ubi, si);
279 if (IS_ERR(new_seb)) {
280 err = PTR_ERR(new_seb);
284 vid_hdr->vol_type = UBI_VID_DYNAMIC;
285 vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID);
286 vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT;
287 vid_hdr->data_size = vid_hdr->used_ebs =
288 vid_hdr->data_pad = cpu_to_be32(0);
289 vid_hdr->lnum = cpu_to_be32(copy);
290 vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum);
291 vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0);
293 /* The EC header is already there, write the VID header */
294 err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
298 /* Write the layout volume contents */
299 err = ubi_io_write_data(ubi, vtbl, new_seb->pnum, 0, ubi->vtbl_size);
304 * And add it to the scanning information. Don't delete the old
305 * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'.
307 err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec,
310 ubi_free_vid_hdr(ubi, vid_hdr);
314 if (err == -EIO && ++tries <= 5) {
316 * Probably this physical eraseblock went bad, try to pick
319 list_add_tail(&new_seb->u.list, &si->corr);
324 ubi_free_vid_hdr(ubi, vid_hdr);
330 * process_lvol - process the layout volume.
331 * @ubi: UBI device description object
332 * @si: scanning information
333 * @sv: layout volume scanning information
335 * This function is responsible for reading the layout volume, ensuring it is
336 * not corrupted, and recovering from corruptions if needed. Returns volume
337 * table in case of success and a negative error code in case of failure.
339 static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
340 struct ubi_scan_info *si,
341 struct ubi_scan_volume *sv)
345 struct ubi_scan_leb *seb;
346 struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL };
347 int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1};
350 * UBI goes through the following steps when it changes the layout
353 * b. write new data to LEB 0;
355 * d. write new data to LEB 1.
357 * Before the change, both LEBs contain the same data.
359 * Due to unclean reboots, the contents of LEB 0 may be lost, but there
360 * should LEB 1. So it is OK if LEB 0 is corrupted while LEB 1 is not.
361 * Similarly, LEB 1 may be lost, but there should be LEB 0. And
362 * finally, unclean reboots may result in a situation when neither LEB
363 * 0 nor LEB 1 are corrupted, but they are different. In this case, LEB
364 * 0 contains more recent information.
366 * So the plan is to first check LEB 0. Then
367 * a. if LEB 0 is OK, it must be containing the most resent data; then
368 * we compare it with LEB 1, and if they are different, we copy LEB
370 * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1
374 dbg_msg("check layout volume");
376 /* Read both LEB 0 and LEB 1 into memory */
377 ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
378 leb[seb->lnum] = vmalloc(ubi->vtbl_size);
379 if (!leb[seb->lnum]) {
383 memset(leb[seb->lnum], 0, ubi->vtbl_size);
385 err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
387 if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
389 * Scrub the PEB later. Note, -EBADMSG indicates an
390 * uncorrectable ECC error, but we have our own CRC and
391 * the data will be checked later. If the data is OK,
392 * the PEB will be scrubbed (because we set
393 * seb->scrub). If the data is not OK, the contents of
394 * the PEB will be recovered from the second copy, and
395 * seb->scrub will be cleared in
396 * 'ubi_scan_add_used()'.
405 leb_corrupted[0] = vtbl_check(ubi, leb[0]);
406 if (leb_corrupted[0] < 0)
410 if (!leb_corrupted[0]) {
413 leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size);
414 if (leb_corrupted[1]) {
415 ubi_warn("volume table copy #2 is corrupted");
416 err = create_vtbl(ubi, si, 1, leb[0]);
419 ubi_msg("volume table was restored");
422 /* Both LEB 1 and LEB 2 are OK and consistent */
426 /* LEB 0 is corrupted or does not exist */
428 leb_corrupted[1] = vtbl_check(ubi, leb[1]);
429 if (leb_corrupted[1] < 0)
432 if (leb_corrupted[1]) {
433 /* Both LEB 0 and LEB 1 are corrupted */
434 ubi_err("both volume tables are corrupted");
438 ubi_warn("volume table copy #1 is corrupted");
439 err = create_vtbl(ubi, si, 0, leb[1]);
442 ubi_msg("volume table was restored");
455 * create_empty_lvol - create empty layout volume.
456 * @ubi: UBI device description object
457 * @si: scanning information
459 * This function returns volume table contents in case of success and a
460 * negative error code in case of failure.
462 static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi,
463 struct ubi_scan_info *si)
466 struct ubi_vtbl_record *vtbl;
468 vtbl = vmalloc(ubi->vtbl_size);
470 return ERR_PTR(-ENOMEM);
471 memset(vtbl, 0, ubi->vtbl_size);
473 for (i = 0; i < ubi->vtbl_slots; i++)
474 memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE);
476 for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
479 err = create_vtbl(ubi, si, i, vtbl);
490 * init_volumes - initialize volume information for existing volumes.
491 * @ubi: UBI device description object
492 * @si: scanning information
493 * @vtbl: volume table
495 * This function allocates volume description objects for existing volumes.
496 * Returns zero in case of success and a negative error code in case of
499 static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
500 const struct ubi_vtbl_record *vtbl)
502 int i, reserved_pebs = 0;
503 struct ubi_scan_volume *sv;
504 struct ubi_volume *vol;
506 for (i = 0; i < ubi->vtbl_slots; i++) {
509 if (be32_to_cpu(vtbl[i].reserved_pebs) == 0)
510 continue; /* Empty record */
512 vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
516 vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
517 vol->alignment = be32_to_cpu(vtbl[i].alignment);
518 vol->data_pad = be32_to_cpu(vtbl[i].data_pad);
519 vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ?
520 UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
521 vol->name_len = be16_to_cpu(vtbl[i].name_len);
522 vol->usable_leb_size = ubi->leb_size - vol->data_pad;
523 memcpy(vol->name, vtbl[i].name, vol->name_len);
524 vol->name[vol->name_len] = '\0';
527 if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) {
528 /* Auto re-size flag may be set only for one volume */
529 if (ubi->autoresize_vol_id != -1) {
530 ubi_err("more then one auto-resize volume (%d "
531 "and %d)", ubi->autoresize_vol_id, i);
536 ubi->autoresize_vol_id = i;
539 ubi_assert(!ubi->volumes[i]);
540 ubi->volumes[i] = vol;
543 reserved_pebs += vol->reserved_pebs;
546 * In case of dynamic volume UBI knows nothing about how many
547 * data is stored there. So assume the whole volume is used.
549 if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
550 vol->used_ebs = vol->reserved_pebs;
551 vol->last_eb_bytes = vol->usable_leb_size;
553 (long long)vol->used_ebs * vol->usable_leb_size;
557 /* Static volumes only */
558 sv = ubi_scan_find_sv(si, i);
561 * No eraseblocks belonging to this volume found. We
562 * don't actually know whether this static volume is
563 * completely corrupted or just contains no data. And
564 * we cannot know this as long as data size is not
565 * stored on flash. So we just assume the volume is
566 * empty. FIXME: this should be handled.
571 if (sv->leb_count != sv->used_ebs) {
573 * We found a static volume which misses several
574 * eraseblocks. Treat it as corrupted.
576 ubi_warn("static volume %d misses %d LEBs - corrupted",
577 sv->vol_id, sv->used_ebs - sv->leb_count);
582 vol->used_ebs = sv->used_ebs;
584 (long long)(vol->used_ebs - 1) * vol->usable_leb_size;
585 vol->used_bytes += sv->last_data_size;
586 vol->last_eb_bytes = sv->last_data_size;
589 /* And add the layout volume */
590 vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
594 vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS;
596 vol->vol_type = UBI_DYNAMIC_VOLUME;
597 vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1;
598 memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1);
599 vol->usable_leb_size = ubi->leb_size;
600 vol->used_ebs = vol->reserved_pebs;
601 vol->last_eb_bytes = vol->reserved_pebs;
603 (long long)vol->used_ebs * (ubi->leb_size - vol->data_pad);
604 vol->vol_id = UBI_LAYOUT_VOLUME_ID;
607 ubi_assert(!ubi->volumes[i]);
608 ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol;
609 reserved_pebs += vol->reserved_pebs;
613 if (reserved_pebs > ubi->avail_pebs)
614 ubi_err("not enough PEBs, required %d, available %d",
615 reserved_pebs, ubi->avail_pebs);
616 ubi->rsvd_pebs += reserved_pebs;
617 ubi->avail_pebs -= reserved_pebs;
623 * check_sv - check volume scanning information.
624 * @vol: UBI volume description object
625 * @sv: volume scanning information
627 * This function returns zero if the volume scanning information is consistent
628 * to the data read from the volume tabla, and %-EINVAL if not.
630 static int check_sv(const struct ubi_volume *vol,
631 const struct ubi_scan_volume *sv)
635 if (sv->highest_lnum >= vol->reserved_pebs) {
639 if (sv->leb_count > vol->reserved_pebs) {
643 if (sv->vol_type != vol->vol_type) {
647 if (sv->used_ebs > vol->reserved_pebs) {
651 if (sv->data_pad != vol->data_pad) {
658 ubi_err("bad scanning information, error %d", err);
660 ubi_dbg_dump_vol_info(vol);
665 * check_scanning_info - check that scanning information.
666 * @ubi: UBI device description object
667 * @si: scanning information
669 * Even though we protect on-flash data by CRC checksums, we still don't trust
670 * the media. This function ensures that scanning information is consistent to
671 * the information read from the volume table. Returns zero if the scanning
672 * information is OK and %-EINVAL if it is not.
674 static int check_scanning_info(const struct ubi_device *ubi,
675 struct ubi_scan_info *si)
678 struct ubi_scan_volume *sv;
679 struct ubi_volume *vol;
681 if (si->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) {
682 ubi_err("scanning found %d volumes, maximum is %d + %d",
683 si->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots);
687 if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&&
688 si->highest_vol_id < UBI_INTERNAL_VOL_START) {
689 ubi_err("too large volume ID %d found by scanning",
695 for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
698 sv = ubi_scan_find_sv(si, i);
699 vol = ubi->volumes[i];
702 ubi_scan_rm_volume(si, sv);
706 if (vol->reserved_pebs == 0) {
707 ubi_assert(i < ubi->vtbl_slots);
713 * During scanning we found a volume which does not
714 * exist according to the information in the volume
715 * table. This must have happened due to an unclean
716 * reboot while the volume was being removed. Discard
719 ubi_msg("finish volume %d removal", sv->vol_id);
720 ubi_scan_rm_volume(si, sv);
722 err = check_sv(vol, sv);
732 * ubi_read_volume_table - read volume table.
734 * @ubi: UBI device description object
735 * @si: scanning information
737 * This function reads volume table, checks it, recover from errors if needed,
738 * or creates it if needed. Returns zero in case of success and a negative
739 * error code in case of failure.
741 int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
744 struct ubi_scan_volume *sv;
746 empty_vtbl_record.crc = cpu_to_be32(0xf116c36b);
749 * The number of supported volumes is limited by the eraseblock size
750 * and by the UBI_MAX_VOLUMES constant.
752 ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE;
753 if (ubi->vtbl_slots > UBI_MAX_VOLUMES)
754 ubi->vtbl_slots = UBI_MAX_VOLUMES;
756 ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE;
757 ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size);
759 sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID);
762 * No logical eraseblocks belonging to the layout volume were
763 * found. This could mean that the flash is just empty. In
764 * this case we create empty layout volume.
766 * But if flash is not empty this must be a corruption or the
767 * MTD device just contains garbage.
770 ubi->vtbl = create_empty_lvol(ubi, si);
771 if (IS_ERR(ubi->vtbl))
772 return PTR_ERR(ubi->vtbl);
774 ubi_err("the layout volume was not found");
778 if (sv->leb_count > UBI_LAYOUT_VOLUME_EBS) {
779 /* This must not happen with proper UBI images */
780 dbg_err("too many LEBs (%d) in layout volume",
785 ubi->vtbl = process_lvol(ubi, si, sv);
786 if (IS_ERR(ubi->vtbl))
787 return PTR_ERR(ubi->vtbl);
790 ubi->avail_pebs = ubi->good_peb_count;
793 * The layout volume is OK, initialize the corresponding in-RAM data
796 err = init_volumes(ubi, si, ubi->vtbl);
801 * Get sure that the scanning information is consistent to the
802 * information stored in the volume table.
804 err = check_scanning_info(ubi, si);
812 for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
813 if (ubi->volumes[i]) {
814 kfree(ubi->volumes[i]);
815 ubi->volumes[i] = NULL;
820 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
823 * paranoid_vtbl_check - check volume table.
824 * @ubi: UBI device description object
826 static void paranoid_vtbl_check(const struct ubi_device *ubi)
828 if (vtbl_check(ubi, ubi->vtbl)) {
829 ubi_err("paranoid check failed");
834 #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */