2 * Copyright (c) International Business Machines Corp., 2006
3 * Copyright (c) Nokia Corporation, 2006, 2007
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
13 * the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 * Author: Artem Bityutskiy (Битюцкий Артём)
23 * This file includes volume table manipulation code. The volume table is an
24 * on-flash table containing volume meta-data like name, number of reserved
25 * physical eraseblocks, type, etc. The volume table is stored in the so-called
28 * The layout volume is an internal volume which is organized as follows. It
29 * consists of two logical eraseblocks - LEB 0 and LEB 1. Each logical
30 * eraseblock stores one volume table copy, i.e. LEB 0 and LEB 1 duplicate each
31 * other. This redundancy guarantees robustness to unclean reboots. The volume
32 * table is basically an array of volume table records. Each record contains
33 * full information about the volume and protected by a CRC checksum.
35 * The volume table is changed, it is first changed in RAM. Then LEB 0 is
36 * erased, and the updated volume table is written back to LEB 0. Then same for
37 * LEB 1. This scheme guarantees recoverability from unclean reboots.
39 * In this UBI implementation the on-flash volume table does not contain any
40 * information about how many data static volumes contain. This information may
41 * be found from the scanning data.
43 * But it would still be beneficial to store this information in the volume
44 * table. For example, suppose we have a static volume X, and all its physical
45 * eraseblocks became bad for some reasons. Suppose we are attaching the
46 * corresponding MTD device, the scanning has found no logical eraseblocks
47 * corresponding to the volume X. According to the volume table volume X does
48 * exist. So we don't know whether it is just empty or all its physical
49 * eraseblocks went bad. So we cannot alarm the user about this corruption.
51 * The volume table also stores so-called "update marker", which is used for
52 * volume updates. Before updating the volume, the update marker is set, and
53 * after the update operation is finished, the update marker is cleared. So if
54 * the update operation was interrupted (e.g. by an unclean reboot) - the
55 * update marker is still there and we know that the volume's contents is
59 #include <linux/crc32.h>
60 #include <linux/err.h>
61 #include <asm/div64.h>
64 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
65 static void paranoid_vtbl_check(const struct ubi_device *ubi);
67 #define paranoid_vtbl_check(ubi)
70 /* Empty volume table record */
71 static struct ubi_vtbl_record empty_vtbl_record;
74 * ubi_change_vtbl_record - change volume table record.
75 * @ubi: UBI device description object
76 * @idx: table index to change
77 * @vtbl_rec: new volume table record
79 * This function changes volume table record @idx. If @vtbl_rec is %NULL, empty
80 * volume table record is written. The caller does not have to calculate CRC of
81 * the record as it is done by this function. Returns zero in case of success
82 * and a negative error code in case of failure.
84 int ubi_change_vtbl_record(struct ubi_device *ubi, int idx,
85 struct ubi_vtbl_record *vtbl_rec)
89 struct ubi_volume *layout_vol;
91 ubi_assert(idx >= 0 && idx < ubi->vtbl_slots);
92 layout_vol = ubi->volumes[vol_id2idx(ubi, UBI_LAYOUT_VOLUME_ID)];
95 vtbl_rec = &empty_vtbl_record;
97 crc = crc32(UBI_CRC32_INIT, vtbl_rec, UBI_VTBL_RECORD_SIZE_CRC);
98 vtbl_rec->crc = cpu_to_be32(crc);
101 memcpy(&ubi->vtbl[idx], vtbl_rec, sizeof(struct ubi_vtbl_record));
102 for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
103 err = ubi_eba_unmap_leb(ubi, layout_vol, i);
107 err = ubi_eba_write_leb(ubi, layout_vol, i, ubi->vtbl, 0,
108 ubi->vtbl_size, UBI_LONGTERM);
113 paranoid_vtbl_check(ubi);
118 * vtbl_check - check if volume table is not corrupted and contains sensible
120 * @ubi: UBI device description object
121 * @vtbl: volume table
123 * This function returns zero if @vtbl is all right, %1 if CRC is incorrect,
124 * and %-EINVAL if it contains inconsistent data.
126 static int vtbl_check(const struct ubi_device *ubi,
127 const struct ubi_vtbl_record *vtbl)
129 int i, n, reserved_pebs, alignment, data_pad, vol_type, name_len;
134 for (i = 0; i < ubi->vtbl_slots; i++) {
137 reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
138 alignment = be32_to_cpu(vtbl[i].alignment);
139 data_pad = be32_to_cpu(vtbl[i].data_pad);
140 upd_marker = vtbl[i].upd_marker;
141 vol_type = vtbl[i].vol_type;
142 name_len = be16_to_cpu(vtbl[i].name_len);
143 name = &vtbl[i].name[0];
145 crc = crc32(UBI_CRC32_INIT, &vtbl[i], UBI_VTBL_RECORD_SIZE_CRC);
146 if (be32_to_cpu(vtbl[i].crc) != crc) {
147 ubi_err("bad CRC at record %u: %#08x, not %#08x",
148 i, crc, be32_to_cpu(vtbl[i].crc));
149 ubi_dbg_dump_vtbl_record(&vtbl[i], i);
153 if (reserved_pebs == 0) {
154 if (memcmp(&vtbl[i], &empty_vtbl_record,
155 UBI_VTBL_RECORD_SIZE)) {
162 if (reserved_pebs < 0 || alignment < 0 || data_pad < 0 ||
168 if (alignment > ubi->leb_size || alignment == 0) {
173 n = alignment % ubi->min_io_size;
174 if (alignment != 1 && n) {
179 n = ubi->leb_size % alignment;
181 dbg_err("bad data_pad, has to be %d", n);
186 if (vol_type != UBI_VID_DYNAMIC && vol_type != UBI_VID_STATIC) {
191 if (upd_marker != 0 && upd_marker != 1) {
196 if (reserved_pebs > ubi->good_peb_count) {
197 dbg_err("too large reserved_pebs, good PEBs %d",
198 ubi->good_peb_count);
203 if (name_len > UBI_VOL_NAME_MAX) {
208 if (name[0] == '\0') {
213 if (name_len != strnlen(name, name_len + 1)) {
219 /* Checks that all names are unique */
220 for (i = 0; i < ubi->vtbl_slots - 1; i++) {
221 for (n = i + 1; n < ubi->vtbl_slots; n++) {
222 int len1 = be16_to_cpu(vtbl[i].name_len);
223 int len2 = be16_to_cpu(vtbl[n].name_len);
225 if (len1 > 0 && len1 == len2 &&
226 !strncmp(vtbl[i].name, vtbl[n].name, len1)) {
227 ubi_err("volumes %d and %d have the same name"
228 " \"%s\"", i, n, vtbl[i].name);
229 ubi_dbg_dump_vtbl_record(&vtbl[i], i);
230 ubi_dbg_dump_vtbl_record(&vtbl[n], n);
239 ubi_err("volume table check failed: record %d, error %d", i, err);
240 ubi_dbg_dump_vtbl_record(&vtbl[i], i);
245 * create_vtbl - create a copy of volume table.
246 * @ubi: UBI device description object
247 * @si: scanning information
248 * @copy: number of the volume table copy
249 * @vtbl: contents of the volume table
251 * This function returns zero in case of success and a negative error code in
254 static int create_vtbl(struct ubi_device *ubi, struct ubi_scan_info *si,
255 int copy, void *vtbl)
258 static struct ubi_vid_hdr *vid_hdr;
259 struct ubi_scan_volume *sv;
260 struct ubi_scan_leb *new_seb, *old_seb = NULL;
262 ubi_msg("create volume table (copy #%d)", copy + 1);
264 vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_KERNEL);
269 * Check if there is a logical eraseblock which would have to contain
270 * this volume table copy was found during scanning. It has to be wiped
273 sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID);
275 old_seb = ubi_scan_find_seb(sv, copy);
278 new_seb = ubi_scan_get_free_peb(ubi, si);
279 if (IS_ERR(new_seb)) {
280 err = PTR_ERR(new_seb);
284 vid_hdr->vol_type = UBI_VID_DYNAMIC;
285 vid_hdr->vol_id = cpu_to_be32(UBI_LAYOUT_VOLUME_ID);
286 vid_hdr->compat = UBI_LAYOUT_VOLUME_COMPAT;
287 vid_hdr->data_size = vid_hdr->used_ebs =
288 vid_hdr->data_pad = cpu_to_be32(0);
289 vid_hdr->lnum = cpu_to_be32(copy);
290 vid_hdr->sqnum = cpu_to_be64(++si->max_sqnum);
291 vid_hdr->leb_ver = cpu_to_be32(old_seb ? old_seb->leb_ver + 1: 0);
293 /* The EC header is already there, write the VID header */
294 err = ubi_io_write_vid_hdr(ubi, new_seb->pnum, vid_hdr);
298 /* Write the layout volume contents */
299 err = ubi_io_write_data(ubi, vtbl, new_seb->pnum, 0, ubi->vtbl_size);
304 * And add it to the scanning information. Don't delete the old
305 * @old_seb as it will be deleted and freed in 'ubi_scan_add_used()'.
307 err = ubi_scan_add_used(ubi, si, new_seb->pnum, new_seb->ec,
310 ubi_free_vid_hdr(ubi, vid_hdr);
314 if (err == -EIO && ++tries <= 5) {
316 * Probably this physical eraseblock went bad, try to pick
319 list_add_tail(&new_seb->u.list, &si->corr);
324 ubi_free_vid_hdr(ubi, vid_hdr);
330 * process_lvol - process the layout volume.
331 * @ubi: UBI device description object
332 * @si: scanning information
333 * @sv: layout volume scanning information
335 * This function is responsible for reading the layout volume, ensuring it is
336 * not corrupted, and recovering from corruptions if needed. Returns volume
337 * table in case of success and a negative error code in case of failure.
339 static struct ubi_vtbl_record *process_lvol(struct ubi_device *ubi,
340 struct ubi_scan_info *si,
341 struct ubi_scan_volume *sv)
345 struct ubi_scan_leb *seb;
346 struct ubi_vtbl_record *leb[UBI_LAYOUT_VOLUME_EBS] = { NULL, NULL };
347 int leb_corrupted[UBI_LAYOUT_VOLUME_EBS] = {1, 1};
350 * UBI goes through the following steps when it changes the layout
353 * b. write new data to LEB 0;
355 * d. write new data to LEB 1.
357 * Before the change, both LEBs contain the same data.
359 * Due to unclean reboots, the contents of LEB 0 may be lost, but there
360 * should LEB 1. So it is OK if LEB 0 is corrupted while LEB 1 is not.
361 * Similarly, LEB 1 may be lost, but there should be LEB 0. And
362 * finally, unclean reboots may result in a situation when neither LEB
363 * 0 nor LEB 1 are corrupted, but they are different. In this case, LEB
364 * 0 contains more recent information.
366 * So the plan is to first check LEB 0. Then
367 * a. if LEB 0 is OK, it must be containing the most resent data; then
368 * we compare it with LEB 1, and if they are different, we copy LEB
370 * b. if LEB 0 is corrupted, but LEB 1 has to be OK, and we copy LEB 1
374 dbg_msg("check layout volume");
376 /* Read both LEB 0 and LEB 1 into memory */
377 ubi_rb_for_each_entry(rb, seb, &sv->root, u.rb) {
378 leb[seb->lnum] = vmalloc(ubi->vtbl_size);
379 if (!leb[seb->lnum]) {
383 memset(leb[seb->lnum], 0, ubi->vtbl_size);
385 err = ubi_io_read_data(ubi, leb[seb->lnum], seb->pnum, 0,
387 if (err == UBI_IO_BITFLIPS || err == -EBADMSG)
388 /* Scrub the PEB later */
396 leb_corrupted[0] = vtbl_check(ubi, leb[0]);
397 if (leb_corrupted[0] < 0)
401 if (!leb_corrupted[0]) {
404 leb_corrupted[1] = memcmp(leb[0], leb[1], ubi->vtbl_size);
405 if (leb_corrupted[1]) {
406 ubi_warn("volume table copy #2 is corrupted");
407 err = create_vtbl(ubi, si, 1, leb[0]);
410 ubi_msg("volume table was restored");
413 /* Both LEB 1 and LEB 2 are OK and consistent */
417 /* LEB 0 is corrupted or does not exist */
419 leb_corrupted[1] = vtbl_check(ubi, leb[1]);
420 if (leb_corrupted[1] < 0)
423 if (leb_corrupted[1]) {
424 /* Both LEB 0 and LEB 1 are corrupted */
425 ubi_err("both volume tables are corrupted");
429 ubi_warn("volume table copy #1 is corrupted");
430 err = create_vtbl(ubi, si, 0, leb[1]);
433 ubi_msg("volume table was restored");
446 * create_empty_lvol - create empty layout volume.
447 * @ubi: UBI device description object
448 * @si: scanning information
450 * This function returns volume table contents in case of success and a
451 * negative error code in case of failure.
453 static struct ubi_vtbl_record *create_empty_lvol(struct ubi_device *ubi,
454 struct ubi_scan_info *si)
457 struct ubi_vtbl_record *vtbl;
459 vtbl = vmalloc(ubi->vtbl_size);
461 return ERR_PTR(-ENOMEM);
462 memset(vtbl, 0, ubi->vtbl_size);
464 for (i = 0; i < ubi->vtbl_slots; i++)
465 memcpy(&vtbl[i], &empty_vtbl_record, UBI_VTBL_RECORD_SIZE);
467 for (i = 0; i < UBI_LAYOUT_VOLUME_EBS; i++) {
470 err = create_vtbl(ubi, si, i, vtbl);
481 * init_volumes - initialize volume information for existing volumes.
482 * @ubi: UBI device description object
483 * @si: scanning information
484 * @vtbl: volume table
486 * This function allocates volume description objects for existing volumes.
487 * Returns zero in case of success and a negative error code in case of
490 static int init_volumes(struct ubi_device *ubi, const struct ubi_scan_info *si,
491 const struct ubi_vtbl_record *vtbl)
493 int i, reserved_pebs = 0;
494 struct ubi_scan_volume *sv;
495 struct ubi_volume *vol;
497 for (i = 0; i < ubi->vtbl_slots; i++) {
500 if (be32_to_cpu(vtbl[i].reserved_pebs) == 0)
501 continue; /* Empty record */
503 vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
507 vol->reserved_pebs = be32_to_cpu(vtbl[i].reserved_pebs);
508 vol->alignment = be32_to_cpu(vtbl[i].alignment);
509 vol->data_pad = be32_to_cpu(vtbl[i].data_pad);
510 vol->vol_type = vtbl[i].vol_type == UBI_VID_DYNAMIC ?
511 UBI_DYNAMIC_VOLUME : UBI_STATIC_VOLUME;
512 vol->name_len = be16_to_cpu(vtbl[i].name_len);
513 vol->usable_leb_size = ubi->leb_size - vol->data_pad;
514 memcpy(vol->name, vtbl[i].name, vol->name_len);
515 vol->name[vol->name_len] = '\0';
518 if (vtbl[i].flags & UBI_VTBL_AUTORESIZE_FLG) {
519 /* Auto re-size flag may be set only for one volume */
520 if (ubi->autoresize_vol_id != -1) {
521 ubi_err("more then one auto-resize volume (%d "
522 "and %d)", ubi->autoresize_vol_id, i);
527 ubi->autoresize_vol_id = i;
530 ubi_assert(!ubi->volumes[i]);
531 ubi->volumes[i] = vol;
534 reserved_pebs += vol->reserved_pebs;
537 * In case of dynamic volume UBI knows nothing about how many
538 * data is stored there. So assume the whole volume is used.
540 if (vol->vol_type == UBI_DYNAMIC_VOLUME) {
541 vol->used_ebs = vol->reserved_pebs;
542 vol->last_eb_bytes = vol->usable_leb_size;
544 (long long)vol->used_ebs * vol->usable_leb_size;
548 /* Static volumes only */
549 sv = ubi_scan_find_sv(si, i);
552 * No eraseblocks belonging to this volume found. We
553 * don't actually know whether this static volume is
554 * completely corrupted or just contains no data. And
555 * we cannot know this as long as data size is not
556 * stored on flash. So we just assume the volume is
557 * empty. FIXME: this should be handled.
562 if (sv->leb_count != sv->used_ebs) {
564 * We found a static volume which misses several
565 * eraseblocks. Treat it as corrupted.
567 ubi_warn("static volume %d misses %d LEBs - corrupted",
568 sv->vol_id, sv->used_ebs - sv->leb_count);
573 vol->used_ebs = sv->used_ebs;
575 (long long)(vol->used_ebs - 1) * vol->usable_leb_size;
576 vol->used_bytes += sv->last_data_size;
577 vol->last_eb_bytes = sv->last_data_size;
580 /* And add the layout volume */
581 vol = kzalloc(sizeof(struct ubi_volume), GFP_KERNEL);
585 vol->reserved_pebs = UBI_LAYOUT_VOLUME_EBS;
587 vol->vol_type = UBI_DYNAMIC_VOLUME;
588 vol->name_len = sizeof(UBI_LAYOUT_VOLUME_NAME) - 1;
589 memcpy(vol->name, UBI_LAYOUT_VOLUME_NAME, vol->name_len + 1);
590 vol->usable_leb_size = ubi->leb_size;
591 vol->used_ebs = vol->reserved_pebs;
592 vol->last_eb_bytes = vol->reserved_pebs;
594 (long long)vol->used_ebs * (ubi->leb_size - vol->data_pad);
595 vol->vol_id = UBI_LAYOUT_VOLUME_ID;
598 ubi_assert(!ubi->volumes[i]);
599 ubi->volumes[vol_id2idx(ubi, vol->vol_id)] = vol;
600 reserved_pebs += vol->reserved_pebs;
604 if (reserved_pebs > ubi->avail_pebs)
605 ubi_err("not enough PEBs, required %d, available %d",
606 reserved_pebs, ubi->avail_pebs);
607 ubi->rsvd_pebs += reserved_pebs;
608 ubi->avail_pebs -= reserved_pebs;
614 * check_sv - check volume scanning information.
615 * @vol: UBI volume description object
616 * @sv: volume scanning information
618 * This function returns zero if the volume scanning information is consistent
619 * to the data read from the volume tabla, and %-EINVAL if not.
621 static int check_sv(const struct ubi_volume *vol,
622 const struct ubi_scan_volume *sv)
626 if (sv->highest_lnum >= vol->reserved_pebs) {
630 if (sv->leb_count > vol->reserved_pebs) {
634 if (sv->vol_type != vol->vol_type) {
638 if (sv->used_ebs > vol->reserved_pebs) {
642 if (sv->data_pad != vol->data_pad) {
649 ubi_err("bad scanning information, error %d", err);
651 ubi_dbg_dump_vol_info(vol);
656 * check_scanning_info - check that scanning information.
657 * @ubi: UBI device description object
658 * @si: scanning information
660 * Even though we protect on-flash data by CRC checksums, we still don't trust
661 * the media. This function ensures that scanning information is consistent to
662 * the information read from the volume table. Returns zero if the scanning
663 * information is OK and %-EINVAL if it is not.
665 static int check_scanning_info(const struct ubi_device *ubi,
666 struct ubi_scan_info *si)
669 struct ubi_scan_volume *sv;
670 struct ubi_volume *vol;
672 if (si->vols_found > UBI_INT_VOL_COUNT + ubi->vtbl_slots) {
673 ubi_err("scanning found %d volumes, maximum is %d + %d",
674 si->vols_found, UBI_INT_VOL_COUNT, ubi->vtbl_slots);
678 if (si->highest_vol_id >= ubi->vtbl_slots + UBI_INT_VOL_COUNT&&
679 si->highest_vol_id < UBI_INTERNAL_VOL_START) {
680 ubi_err("too large volume ID %d found by scanning",
686 for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++) {
689 sv = ubi_scan_find_sv(si, i);
690 vol = ubi->volumes[i];
693 ubi_scan_rm_volume(si, sv);
697 if (vol->reserved_pebs == 0) {
698 ubi_assert(i < ubi->vtbl_slots);
704 * During scanning we found a volume which does not
705 * exist according to the information in the volume
706 * table. This must have happened due to an unclean
707 * reboot while the volume was being removed. Discard
710 ubi_msg("finish volume %d removal", sv->vol_id);
711 ubi_scan_rm_volume(si, sv);
713 err = check_sv(vol, sv);
723 * ubi_read_volume_table - read volume table.
725 * @ubi: UBI device description object
726 * @si: scanning information
728 * This function reads volume table, checks it, recover from errors if needed,
729 * or creates it if needed. Returns zero in case of success and a negative
730 * error code in case of failure.
732 int ubi_read_volume_table(struct ubi_device *ubi, struct ubi_scan_info *si)
735 struct ubi_scan_volume *sv;
737 empty_vtbl_record.crc = cpu_to_be32(0xf116c36b);
740 * The number of supported volumes is limited by the eraseblock size
741 * and by the UBI_MAX_VOLUMES constant.
743 ubi->vtbl_slots = ubi->leb_size / UBI_VTBL_RECORD_SIZE;
744 if (ubi->vtbl_slots > UBI_MAX_VOLUMES)
745 ubi->vtbl_slots = UBI_MAX_VOLUMES;
747 ubi->vtbl_size = ubi->vtbl_slots * UBI_VTBL_RECORD_SIZE;
748 ubi->vtbl_size = ALIGN(ubi->vtbl_size, ubi->min_io_size);
750 sv = ubi_scan_find_sv(si, UBI_LAYOUT_VOLUME_ID);
753 * No logical eraseblocks belonging to the layout volume were
754 * found. This could mean that the flash is just empty. In
755 * this case we create empty layout volume.
757 * But if flash is not empty this must be a corruption or the
758 * MTD device just contains garbage.
761 ubi->vtbl = create_empty_lvol(ubi, si);
762 if (IS_ERR(ubi->vtbl))
763 return PTR_ERR(ubi->vtbl);
765 ubi_err("the layout volume was not found");
769 if (sv->leb_count > UBI_LAYOUT_VOLUME_EBS) {
770 /* This must not happen with proper UBI images */
771 dbg_err("too many LEBs (%d) in layout volume",
776 ubi->vtbl = process_lvol(ubi, si, sv);
777 if (IS_ERR(ubi->vtbl))
778 return PTR_ERR(ubi->vtbl);
781 ubi->avail_pebs = ubi->good_peb_count;
784 * The layout volume is OK, initialize the corresponding in-RAM data
787 err = init_volumes(ubi, si, ubi->vtbl);
792 * Get sure that the scanning information is consistent to the
793 * information stored in the volume table.
795 err = check_scanning_info(ubi, si);
803 for (i = 0; i < ubi->vtbl_slots + UBI_INT_VOL_COUNT; i++)
804 if (ubi->volumes[i]) {
805 kfree(ubi->volumes[i]);
806 ubi->volumes[i] = NULL;
811 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
814 * paranoid_vtbl_check - check volume table.
815 * @ubi: UBI device description object
817 static void paranoid_vtbl_check(const struct ubi_device *ubi)
819 if (vtbl_check(ubi, ubi->vtbl)) {
820 ubi_err("paranoid check failed");
825 #endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */