Merge commit 'origin/master' into next
[linux-2.6] / fs / ext4 / extents.c
1 /*
2  * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
3  * Written by Alex Tomas <alex@clusterfs.com>
4  *
5  * Architecture independence:
6  *   Copyright (c) 2005, Bull S.A.
7  *   Written by Pierre Peiffer <pierre.peiffer@bull.net>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16  * GNU General Public License for more details.
17  *
18  * You should have received a copy of the GNU General Public Licens
19  * along with this program; if not, write to the Free Software
20  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
21  */
22
23 /*
24  * Extents support for EXT4
25  *
26  * TODO:
27  *   - ext4*_error() should be used in some situations
28  *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
29  *   - smart tree reduction
30  */
31
32 #include <linux/module.h>
33 #include <linux/fs.h>
34 #include <linux/time.h>
35 #include <linux/jbd2.h>
36 #include <linux/highuid.h>
37 #include <linux/pagemap.h>
38 #include <linux/quotaops.h>
39 #include <linux/string.h>
40 #include <linux/slab.h>
41 #include <linux/falloc.h>
42 #include <asm/uaccess.h>
43 #include <linux/fiemap.h>
44 #include "ext4_jbd2.h"
45 #include "ext4_extents.h"
46
47
48 /*
49  * ext_pblock:
50  * combine low and high parts of physical block number into ext4_fsblk_t
51  */
52 static ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
53 {
54         ext4_fsblk_t block;
55
56         block = le32_to_cpu(ex->ee_start_lo);
57         block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
58         return block;
59 }
60
61 /*
62  * idx_pblock:
63  * combine low and high parts of a leaf physical block number into ext4_fsblk_t
64  */
65 ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
66 {
67         ext4_fsblk_t block;
68
69         block = le32_to_cpu(ix->ei_leaf_lo);
70         block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
71         return block;
72 }
73
74 /*
75  * ext4_ext_store_pblock:
76  * stores a large physical block number into an extent struct,
77  * breaking it into parts
78  */
79 void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
80 {
81         ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
82         ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
83 }
84
85 /*
86  * ext4_idx_store_pblock:
87  * stores a large physical block number into an index struct,
88  * breaking it into parts
89  */
90 static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
91 {
92         ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
93         ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
94 }
95
96 static int ext4_ext_journal_restart(handle_t *handle, int needed)
97 {
98         int err;
99
100         if (!ext4_handle_valid(handle))
101                 return 0;
102         if (handle->h_buffer_credits > needed)
103                 return 0;
104         err = ext4_journal_extend(handle, needed);
105         if (err <= 0)
106                 return err;
107         return ext4_journal_restart(handle, needed);
108 }
109
110 /*
111  * could return:
112  *  - EROFS
113  *  - ENOMEM
114  */
115 static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
116                                 struct ext4_ext_path *path)
117 {
118         if (path->p_bh) {
119                 /* path points to block */
120                 return ext4_journal_get_write_access(handle, path->p_bh);
121         }
122         /* path points to leaf/index in inode body */
123         /* we use in-core data, no need to protect them */
124         return 0;
125 }
126
127 /*
128  * could return:
129  *  - EROFS
130  *  - ENOMEM
131  *  - EIO
132  */
133 static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
134                                 struct ext4_ext_path *path)
135 {
136         int err;
137         if (path->p_bh) {
138                 /* path points to block */
139                 err = ext4_handle_dirty_metadata(handle, inode, path->p_bh);
140         } else {
141                 /* path points to leaf/index in inode body */
142                 err = ext4_mark_inode_dirty(handle, inode);
143         }
144         return err;
145 }
146
147 static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
148                               struct ext4_ext_path *path,
149                               ext4_lblk_t block)
150 {
151         struct ext4_inode_info *ei = EXT4_I(inode);
152         ext4_fsblk_t bg_start;
153         ext4_fsblk_t last_block;
154         ext4_grpblk_t colour;
155         int depth;
156
157         if (path) {
158                 struct ext4_extent *ex;
159                 depth = path->p_depth;
160
161                 /* try to predict block placement */
162                 ex = path[depth].p_ext;
163                 if (ex)
164                         return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
165
166                 /* it looks like index is empty;
167                  * try to find starting block from index itself */
168                 if (path[depth].p_bh)
169                         return path[depth].p_bh->b_blocknr;
170         }
171
172         /* OK. use inode's group */
173         bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
174                 le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
175         last_block = ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es) - 1;
176
177         if (bg_start + EXT4_BLOCKS_PER_GROUP(inode->i_sb) <= last_block)
178                 colour = (current->pid % 16) *
179                         (EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
180         else
181                 colour = (current->pid % 16) * ((last_block - bg_start) / 16);
182         return bg_start + colour + block;
183 }
184
185 /*
186  * Allocation for a meta data block
187  */
188 static ext4_fsblk_t
189 ext4_ext_new_meta_block(handle_t *handle, struct inode *inode,
190                         struct ext4_ext_path *path,
191                         struct ext4_extent *ex, int *err)
192 {
193         ext4_fsblk_t goal, newblock;
194
195         goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
196         newblock = ext4_new_meta_blocks(handle, inode, goal, NULL, err);
197         return newblock;
198 }
199
200 static int ext4_ext_space_block(struct inode *inode)
201 {
202         int size;
203
204         size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
205                         / sizeof(struct ext4_extent);
206 #ifdef AGGRESSIVE_TEST
207         if (size > 6)
208                 size = 6;
209 #endif
210         return size;
211 }
212
213 static int ext4_ext_space_block_idx(struct inode *inode)
214 {
215         int size;
216
217         size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
218                         / sizeof(struct ext4_extent_idx);
219 #ifdef AGGRESSIVE_TEST
220         if (size > 5)
221                 size = 5;
222 #endif
223         return size;
224 }
225
226 static int ext4_ext_space_root(struct inode *inode)
227 {
228         int size;
229
230         size = sizeof(EXT4_I(inode)->i_data);
231         size -= sizeof(struct ext4_extent_header);
232         size /= sizeof(struct ext4_extent);
233 #ifdef AGGRESSIVE_TEST
234         if (size > 3)
235                 size = 3;
236 #endif
237         return size;
238 }
239
240 static int ext4_ext_space_root_idx(struct inode *inode)
241 {
242         int size;
243
244         size = sizeof(EXT4_I(inode)->i_data);
245         size -= sizeof(struct ext4_extent_header);
246         size /= sizeof(struct ext4_extent_idx);
247 #ifdef AGGRESSIVE_TEST
248         if (size > 4)
249                 size = 4;
250 #endif
251         return size;
252 }
253
254 /*
255  * Calculate the number of metadata blocks needed
256  * to allocate @blocks
257  * Worse case is one block per extent
258  */
259 int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
260 {
261         int lcap, icap, rcap, leafs, idxs, num;
262         int newextents = blocks;
263
264         rcap = ext4_ext_space_root_idx(inode);
265         lcap = ext4_ext_space_block(inode);
266         icap = ext4_ext_space_block_idx(inode);
267
268         /* number of new leaf blocks needed */
269         num = leafs = (newextents + lcap - 1) / lcap;
270
271         /*
272          * Worse case, we need separate index block(s)
273          * to link all new leaf blocks
274          */
275         idxs = (leafs + icap - 1) / icap;
276         do {
277                 num += idxs;
278                 idxs = (idxs + icap - 1) / icap;
279         } while (idxs > rcap);
280
281         return num;
282 }
283
284 static int
285 ext4_ext_max_entries(struct inode *inode, int depth)
286 {
287         int max;
288
289         if (depth == ext_depth(inode)) {
290                 if (depth == 0)
291                         max = ext4_ext_space_root(inode);
292                 else
293                         max = ext4_ext_space_root_idx(inode);
294         } else {
295                 if (depth == 0)
296                         max = ext4_ext_space_block(inode);
297                 else
298                         max = ext4_ext_space_block_idx(inode);
299         }
300
301         return max;
302 }
303
304 static int __ext4_ext_check_header(const char *function, struct inode *inode,
305                                         struct ext4_extent_header *eh,
306                                         int depth)
307 {
308         const char *error_msg;
309         int max = 0;
310
311         if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
312                 error_msg = "invalid magic";
313                 goto corrupted;
314         }
315         if (unlikely(le16_to_cpu(eh->eh_depth) != depth)) {
316                 error_msg = "unexpected eh_depth";
317                 goto corrupted;
318         }
319         if (unlikely(eh->eh_max == 0)) {
320                 error_msg = "invalid eh_max";
321                 goto corrupted;
322         }
323         max = ext4_ext_max_entries(inode, depth);
324         if (unlikely(le16_to_cpu(eh->eh_max) > max)) {
325                 error_msg = "too large eh_max";
326                 goto corrupted;
327         }
328         if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
329                 error_msg = "invalid eh_entries";
330                 goto corrupted;
331         }
332         return 0;
333
334 corrupted:
335         ext4_error(inode->i_sb, function,
336                         "bad header in inode #%lu: %s - magic %x, "
337                         "entries %u, max %u(%u), depth %u(%u)",
338                         inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
339                         le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
340                         max, le16_to_cpu(eh->eh_depth), depth);
341
342         return -EIO;
343 }
344
345 #define ext4_ext_check_header(inode, eh, depth) \
346         __ext4_ext_check_header(__func__, inode, eh, depth)
347
348 #ifdef EXT_DEBUG
349 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
350 {
351         int k, l = path->p_depth;
352
353         ext_debug("path:");
354         for (k = 0; k <= l; k++, path++) {
355                 if (path->p_idx) {
356                   ext_debug("  %d->%llu", le32_to_cpu(path->p_idx->ei_block),
357                             idx_pblock(path->p_idx));
358                 } else if (path->p_ext) {
359                         ext_debug("  %d:%d:%llu ",
360                                   le32_to_cpu(path->p_ext->ee_block),
361                                   ext4_ext_get_actual_len(path->p_ext),
362                                   ext_pblock(path->p_ext));
363                 } else
364                         ext_debug("  []");
365         }
366         ext_debug("\n");
367 }
368
369 static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
370 {
371         int depth = ext_depth(inode);
372         struct ext4_extent_header *eh;
373         struct ext4_extent *ex;
374         int i;
375
376         if (!path)
377                 return;
378
379         eh = path[depth].p_hdr;
380         ex = EXT_FIRST_EXTENT(eh);
381
382         for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
383                 ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block),
384                           ext4_ext_get_actual_len(ex), ext_pblock(ex));
385         }
386         ext_debug("\n");
387 }
388 #else
389 #define ext4_ext_show_path(inode, path)
390 #define ext4_ext_show_leaf(inode, path)
391 #endif
392
393 void ext4_ext_drop_refs(struct ext4_ext_path *path)
394 {
395         int depth = path->p_depth;
396         int i;
397
398         for (i = 0; i <= depth; i++, path++)
399                 if (path->p_bh) {
400                         brelse(path->p_bh);
401                         path->p_bh = NULL;
402                 }
403 }
404
405 /*
406  * ext4_ext_binsearch_idx:
407  * binary search for the closest index of the given block
408  * the header must be checked before calling this
409  */
410 static void
411 ext4_ext_binsearch_idx(struct inode *inode,
412                         struct ext4_ext_path *path, ext4_lblk_t block)
413 {
414         struct ext4_extent_header *eh = path->p_hdr;
415         struct ext4_extent_idx *r, *l, *m;
416
417
418         ext_debug("binsearch for %u(idx):  ", block);
419
420         l = EXT_FIRST_INDEX(eh) + 1;
421         r = EXT_LAST_INDEX(eh);
422         while (l <= r) {
423                 m = l + (r - l) / 2;
424                 if (block < le32_to_cpu(m->ei_block))
425                         r = m - 1;
426                 else
427                         l = m + 1;
428                 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ei_block),
429                                 m, le32_to_cpu(m->ei_block),
430                                 r, le32_to_cpu(r->ei_block));
431         }
432
433         path->p_idx = l - 1;
434         ext_debug("  -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
435                   idx_pblock(path->p_idx));
436
437 #ifdef CHECK_BINSEARCH
438         {
439                 struct ext4_extent_idx *chix, *ix;
440                 int k;
441
442                 chix = ix = EXT_FIRST_INDEX(eh);
443                 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
444                   if (k != 0 &&
445                       le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
446                                 printk(KERN_DEBUG "k=%d, ix=0x%p, "
447                                        "first=0x%p\n", k,
448                                        ix, EXT_FIRST_INDEX(eh));
449                                 printk(KERN_DEBUG "%u <= %u\n",
450                                        le32_to_cpu(ix->ei_block),
451                                        le32_to_cpu(ix[-1].ei_block));
452                         }
453                         BUG_ON(k && le32_to_cpu(ix->ei_block)
454                                            <= le32_to_cpu(ix[-1].ei_block));
455                         if (block < le32_to_cpu(ix->ei_block))
456                                 break;
457                         chix = ix;
458                 }
459                 BUG_ON(chix != path->p_idx);
460         }
461 #endif
462
463 }
464
465 /*
466  * ext4_ext_binsearch:
467  * binary search for closest extent of the given block
468  * the header must be checked before calling this
469  */
470 static void
471 ext4_ext_binsearch(struct inode *inode,
472                 struct ext4_ext_path *path, ext4_lblk_t block)
473 {
474         struct ext4_extent_header *eh = path->p_hdr;
475         struct ext4_extent *r, *l, *m;
476
477         if (eh->eh_entries == 0) {
478                 /*
479                  * this leaf is empty:
480                  * we get such a leaf in split/add case
481                  */
482                 return;
483         }
484
485         ext_debug("binsearch for %u:  ", block);
486
487         l = EXT_FIRST_EXTENT(eh) + 1;
488         r = EXT_LAST_EXTENT(eh);
489
490         while (l <= r) {
491                 m = l + (r - l) / 2;
492                 if (block < le32_to_cpu(m->ee_block))
493                         r = m - 1;
494                 else
495                         l = m + 1;
496                 ext_debug("%p(%u):%p(%u):%p(%u) ", l, le32_to_cpu(l->ee_block),
497                                 m, le32_to_cpu(m->ee_block),
498                                 r, le32_to_cpu(r->ee_block));
499         }
500
501         path->p_ext = l - 1;
502         ext_debug("  -> %d:%llu:%d ",
503                         le32_to_cpu(path->p_ext->ee_block),
504                         ext_pblock(path->p_ext),
505                         ext4_ext_get_actual_len(path->p_ext));
506
507 #ifdef CHECK_BINSEARCH
508         {
509                 struct ext4_extent *chex, *ex;
510                 int k;
511
512                 chex = ex = EXT_FIRST_EXTENT(eh);
513                 for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
514                         BUG_ON(k && le32_to_cpu(ex->ee_block)
515                                           <= le32_to_cpu(ex[-1].ee_block));
516                         if (block < le32_to_cpu(ex->ee_block))
517                                 break;
518                         chex = ex;
519                 }
520                 BUG_ON(chex != path->p_ext);
521         }
522 #endif
523
524 }
525
526 int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
527 {
528         struct ext4_extent_header *eh;
529
530         eh = ext_inode_hdr(inode);
531         eh->eh_depth = 0;
532         eh->eh_entries = 0;
533         eh->eh_magic = EXT4_EXT_MAGIC;
534         eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode));
535         ext4_mark_inode_dirty(handle, inode);
536         ext4_ext_invalidate_cache(inode);
537         return 0;
538 }
539
540 struct ext4_ext_path *
541 ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
542                                         struct ext4_ext_path *path)
543 {
544         struct ext4_extent_header *eh;
545         struct buffer_head *bh;
546         short int depth, i, ppos = 0, alloc = 0;
547
548         eh = ext_inode_hdr(inode);
549         depth = ext_depth(inode);
550         if (ext4_ext_check_header(inode, eh, depth))
551                 return ERR_PTR(-EIO);
552
553
554         /* account possible depth increase */
555         if (!path) {
556                 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
557                                 GFP_NOFS);
558                 if (!path)
559                         return ERR_PTR(-ENOMEM);
560                 alloc = 1;
561         }
562         path[0].p_hdr = eh;
563         path[0].p_bh = NULL;
564
565         i = depth;
566         /* walk through the tree */
567         while (i) {
568                 ext_debug("depth %d: num %d, max %d\n",
569                           ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
570
571                 ext4_ext_binsearch_idx(inode, path + ppos, block);
572                 path[ppos].p_block = idx_pblock(path[ppos].p_idx);
573                 path[ppos].p_depth = i;
574                 path[ppos].p_ext = NULL;
575
576                 bh = sb_bread(inode->i_sb, path[ppos].p_block);
577                 if (!bh)
578                         goto err;
579
580                 eh = ext_block_hdr(bh);
581                 ppos++;
582                 BUG_ON(ppos > depth);
583                 path[ppos].p_bh = bh;
584                 path[ppos].p_hdr = eh;
585                 i--;
586
587                 if (ext4_ext_check_header(inode, eh, i))
588                         goto err;
589         }
590
591         path[ppos].p_depth = i;
592         path[ppos].p_ext = NULL;
593         path[ppos].p_idx = NULL;
594
595         /* find extent */
596         ext4_ext_binsearch(inode, path + ppos, block);
597         /* if not an empty leaf */
598         if (path[ppos].p_ext)
599                 path[ppos].p_block = ext_pblock(path[ppos].p_ext);
600
601         ext4_ext_show_path(inode, path);
602
603         return path;
604
605 err:
606         ext4_ext_drop_refs(path);
607         if (alloc)
608                 kfree(path);
609         return ERR_PTR(-EIO);
610 }
611
612 /*
613  * ext4_ext_insert_index:
614  * insert new index [@logical;@ptr] into the block at @curp;
615  * check where to insert: before @curp or after @curp
616  */
617 static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
618                                 struct ext4_ext_path *curp,
619                                 int logical, ext4_fsblk_t ptr)
620 {
621         struct ext4_extent_idx *ix;
622         int len, err;
623
624         err = ext4_ext_get_access(handle, inode, curp);
625         if (err)
626                 return err;
627
628         BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
629         len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
630         if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
631                 /* insert after */
632                 if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
633                         len = (len - 1) * sizeof(struct ext4_extent_idx);
634                         len = len < 0 ? 0 : len;
635                         ext_debug("insert new index %d after: %llu. "
636                                         "move %d from 0x%p to 0x%p\n",
637                                         logical, ptr, len,
638                                         (curp->p_idx + 1), (curp->p_idx + 2));
639                         memmove(curp->p_idx + 2, curp->p_idx + 1, len);
640                 }
641                 ix = curp->p_idx + 1;
642         } else {
643                 /* insert before */
644                 len = len * sizeof(struct ext4_extent_idx);
645                 len = len < 0 ? 0 : len;
646                 ext_debug("insert new index %d before: %llu. "
647                                 "move %d from 0x%p to 0x%p\n",
648                                 logical, ptr, len,
649                                 curp->p_idx, (curp->p_idx + 1));
650                 memmove(curp->p_idx + 1, curp->p_idx, len);
651                 ix = curp->p_idx;
652         }
653
654         ix->ei_block = cpu_to_le32(logical);
655         ext4_idx_store_pblock(ix, ptr);
656         le16_add_cpu(&curp->p_hdr->eh_entries, 1);
657
658         BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
659                              > le16_to_cpu(curp->p_hdr->eh_max));
660         BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
661
662         err = ext4_ext_dirty(handle, inode, curp);
663         ext4_std_error(inode->i_sb, err);
664
665         return err;
666 }
667
668 /*
669  * ext4_ext_split:
670  * inserts new subtree into the path, using free index entry
671  * at depth @at:
672  * - allocates all needed blocks (new leaf and all intermediate index blocks)
673  * - makes decision where to split
674  * - moves remaining extents and index entries (right to the split point)
675  *   into the newly allocated blocks
676  * - initializes subtree
677  */
678 static int ext4_ext_split(handle_t *handle, struct inode *inode,
679                                 struct ext4_ext_path *path,
680                                 struct ext4_extent *newext, int at)
681 {
682         struct buffer_head *bh = NULL;
683         int depth = ext_depth(inode);
684         struct ext4_extent_header *neh;
685         struct ext4_extent_idx *fidx;
686         struct ext4_extent *ex;
687         int i = at, k, m, a;
688         ext4_fsblk_t newblock, oldblock;
689         __le32 border;
690         ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
691         int err = 0;
692
693         /* make decision: where to split? */
694         /* FIXME: now decision is simplest: at current extent */
695
696         /* if current leaf will be split, then we should use
697          * border from split point */
698         BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
699         if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
700                 border = path[depth].p_ext[1].ee_block;
701                 ext_debug("leaf will be split."
702                                 " next leaf starts at %d\n",
703                                   le32_to_cpu(border));
704         } else {
705                 border = newext->ee_block;
706                 ext_debug("leaf will be added."
707                                 " next leaf starts at %d\n",
708                                 le32_to_cpu(border));
709         }
710
711         /*
712          * If error occurs, then we break processing
713          * and mark filesystem read-only. index won't
714          * be inserted and tree will be in consistent
715          * state. Next mount will repair buffers too.
716          */
717
718         /*
719          * Get array to track all allocated blocks.
720          * We need this to handle errors and free blocks
721          * upon them.
722          */
723         ablocks = kzalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
724         if (!ablocks)
725                 return -ENOMEM;
726
727         /* allocate all needed blocks */
728         ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
729         for (a = 0; a < depth - at; a++) {
730                 newblock = ext4_ext_new_meta_block(handle, inode, path,
731                                                    newext, &err);
732                 if (newblock == 0)
733                         goto cleanup;
734                 ablocks[a] = newblock;
735         }
736
737         /* initialize new leaf */
738         newblock = ablocks[--a];
739         BUG_ON(newblock == 0);
740         bh = sb_getblk(inode->i_sb, newblock);
741         if (!bh) {
742                 err = -EIO;
743                 goto cleanup;
744         }
745         lock_buffer(bh);
746
747         err = ext4_journal_get_create_access(handle, bh);
748         if (err)
749                 goto cleanup;
750
751         neh = ext_block_hdr(bh);
752         neh->eh_entries = 0;
753         neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
754         neh->eh_magic = EXT4_EXT_MAGIC;
755         neh->eh_depth = 0;
756         ex = EXT_FIRST_EXTENT(neh);
757
758         /* move remainder of path[depth] to the new leaf */
759         BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
760         /* start copy from next extent */
761         /* TODO: we could do it by single memmove */
762         m = 0;
763         path[depth].p_ext++;
764         while (path[depth].p_ext <=
765                         EXT_MAX_EXTENT(path[depth].p_hdr)) {
766                 ext_debug("move %d:%llu:%d in new leaf %llu\n",
767                                 le32_to_cpu(path[depth].p_ext->ee_block),
768                                 ext_pblock(path[depth].p_ext),
769                                 ext4_ext_get_actual_len(path[depth].p_ext),
770                                 newblock);
771                 /*memmove(ex++, path[depth].p_ext++,
772                                 sizeof(struct ext4_extent));
773                 neh->eh_entries++;*/
774                 path[depth].p_ext++;
775                 m++;
776         }
777         if (m) {
778                 memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
779                 le16_add_cpu(&neh->eh_entries, m);
780         }
781
782         set_buffer_uptodate(bh);
783         unlock_buffer(bh);
784
785         err = ext4_handle_dirty_metadata(handle, inode, bh);
786         if (err)
787                 goto cleanup;
788         brelse(bh);
789         bh = NULL;
790
791         /* correct old leaf */
792         if (m) {
793                 err = ext4_ext_get_access(handle, inode, path + depth);
794                 if (err)
795                         goto cleanup;
796                 le16_add_cpu(&path[depth].p_hdr->eh_entries, -m);
797                 err = ext4_ext_dirty(handle, inode, path + depth);
798                 if (err)
799                         goto cleanup;
800
801         }
802
803         /* create intermediate indexes */
804         k = depth - at - 1;
805         BUG_ON(k < 0);
806         if (k)
807                 ext_debug("create %d intermediate indices\n", k);
808         /* insert new index into current index block */
809         /* current depth stored in i var */
810         i = depth - 1;
811         while (k--) {
812                 oldblock = newblock;
813                 newblock = ablocks[--a];
814                 bh = sb_getblk(inode->i_sb, newblock);
815                 if (!bh) {
816                         err = -EIO;
817                         goto cleanup;
818                 }
819                 lock_buffer(bh);
820
821                 err = ext4_journal_get_create_access(handle, bh);
822                 if (err)
823                         goto cleanup;
824
825                 neh = ext_block_hdr(bh);
826                 neh->eh_entries = cpu_to_le16(1);
827                 neh->eh_magic = EXT4_EXT_MAGIC;
828                 neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
829                 neh->eh_depth = cpu_to_le16(depth - i);
830                 fidx = EXT_FIRST_INDEX(neh);
831                 fidx->ei_block = border;
832                 ext4_idx_store_pblock(fidx, oldblock);
833
834                 ext_debug("int.index at %d (block %llu): %u -> %llu\n",
835                                 i, newblock, le32_to_cpu(border), oldblock);
836                 /* copy indexes */
837                 m = 0;
838                 path[i].p_idx++;
839
840                 ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
841                                 EXT_MAX_INDEX(path[i].p_hdr));
842                 BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
843                                 EXT_LAST_INDEX(path[i].p_hdr));
844                 while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
845                         ext_debug("%d: move %d:%llu in new index %llu\n", i,
846                                         le32_to_cpu(path[i].p_idx->ei_block),
847                                         idx_pblock(path[i].p_idx),
848                                         newblock);
849                         /*memmove(++fidx, path[i].p_idx++,
850                                         sizeof(struct ext4_extent_idx));
851                         neh->eh_entries++;
852                         BUG_ON(neh->eh_entries > neh->eh_max);*/
853                         path[i].p_idx++;
854                         m++;
855                 }
856                 if (m) {
857                         memmove(++fidx, path[i].p_idx - m,
858                                 sizeof(struct ext4_extent_idx) * m);
859                         le16_add_cpu(&neh->eh_entries, m);
860                 }
861                 set_buffer_uptodate(bh);
862                 unlock_buffer(bh);
863
864                 err = ext4_handle_dirty_metadata(handle, inode, bh);
865                 if (err)
866                         goto cleanup;
867                 brelse(bh);
868                 bh = NULL;
869
870                 /* correct old index */
871                 if (m) {
872                         err = ext4_ext_get_access(handle, inode, path + i);
873                         if (err)
874                                 goto cleanup;
875                         le16_add_cpu(&path[i].p_hdr->eh_entries, -m);
876                         err = ext4_ext_dirty(handle, inode, path + i);
877                         if (err)
878                                 goto cleanup;
879                 }
880
881                 i--;
882         }
883
884         /* insert new index */
885         err = ext4_ext_insert_index(handle, inode, path + at,
886                                     le32_to_cpu(border), newblock);
887
888 cleanup:
889         if (bh) {
890                 if (buffer_locked(bh))
891                         unlock_buffer(bh);
892                 brelse(bh);
893         }
894
895         if (err) {
896                 /* free all allocated blocks in error case */
897                 for (i = 0; i < depth; i++) {
898                         if (!ablocks[i])
899                                 continue;
900                         ext4_free_blocks(handle, inode, ablocks[i], 1, 1);
901                 }
902         }
903         kfree(ablocks);
904
905         return err;
906 }
907
908 /*
909  * ext4_ext_grow_indepth:
910  * implements tree growing procedure:
911  * - allocates new block
912  * - moves top-level data (index block or leaf) into the new block
913  * - initializes new top-level, creating index that points to the
914  *   just created block
915  */
916 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
917                                         struct ext4_ext_path *path,
918                                         struct ext4_extent *newext)
919 {
920         struct ext4_ext_path *curp = path;
921         struct ext4_extent_header *neh;
922         struct ext4_extent_idx *fidx;
923         struct buffer_head *bh;
924         ext4_fsblk_t newblock;
925         int err = 0;
926
927         newblock = ext4_ext_new_meta_block(handle, inode, path, newext, &err);
928         if (newblock == 0)
929                 return err;
930
931         bh = sb_getblk(inode->i_sb, newblock);
932         if (!bh) {
933                 err = -EIO;
934                 ext4_std_error(inode->i_sb, err);
935                 return err;
936         }
937         lock_buffer(bh);
938
939         err = ext4_journal_get_create_access(handle, bh);
940         if (err) {
941                 unlock_buffer(bh);
942                 goto out;
943         }
944
945         /* move top-level index/leaf into new block */
946         memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data));
947
948         /* set size of new block */
949         neh = ext_block_hdr(bh);
950         /* old root could have indexes or leaves
951          * so calculate e_max right way */
952         if (ext_depth(inode))
953           neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
954         else
955           neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
956         neh->eh_magic = EXT4_EXT_MAGIC;
957         set_buffer_uptodate(bh);
958         unlock_buffer(bh);
959
960         err = ext4_handle_dirty_metadata(handle, inode, bh);
961         if (err)
962                 goto out;
963
964         /* create index in new top-level index: num,max,pointer */
965         err = ext4_ext_get_access(handle, inode, curp);
966         if (err)
967                 goto out;
968
969         curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
970         curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode));
971         curp->p_hdr->eh_entries = cpu_to_le16(1);
972         curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
973
974         if (path[0].p_hdr->eh_depth)
975                 curp->p_idx->ei_block =
976                         EXT_FIRST_INDEX(path[0].p_hdr)->ei_block;
977         else
978                 curp->p_idx->ei_block =
979                         EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
980         ext4_idx_store_pblock(curp->p_idx, newblock);
981
982         neh = ext_inode_hdr(inode);
983         fidx = EXT_FIRST_INDEX(neh);
984         ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
985                   le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
986                   le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
987
988         neh->eh_depth = cpu_to_le16(path->p_depth + 1);
989         err = ext4_ext_dirty(handle, inode, curp);
990 out:
991         brelse(bh);
992
993         return err;
994 }
995
996 /*
997  * ext4_ext_create_new_leaf:
998  * finds empty index and adds new leaf.
999  * if no free index is found, then it requests in-depth growing.
1000  */
1001 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1002                                         struct ext4_ext_path *path,
1003                                         struct ext4_extent *newext)
1004 {
1005         struct ext4_ext_path *curp;
1006         int depth, i, err = 0;
1007
1008 repeat:
1009         i = depth = ext_depth(inode);
1010
1011         /* walk up to the tree and look for free index entry */
1012         curp = path + depth;
1013         while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
1014                 i--;
1015                 curp--;
1016         }
1017
1018         /* we use already allocated block for index block,
1019          * so subsequent data blocks should be contiguous */
1020         if (EXT_HAS_FREE_INDEX(curp)) {
1021                 /* if we found index with free entry, then use that
1022                  * entry: create all needed subtree and add new leaf */
1023                 err = ext4_ext_split(handle, inode, path, newext, i);
1024                 if (err)
1025                         goto out;
1026
1027                 /* refill path */
1028                 ext4_ext_drop_refs(path);
1029                 path = ext4_ext_find_extent(inode,
1030                                     (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1031                                     path);
1032                 if (IS_ERR(path))
1033                         err = PTR_ERR(path);
1034         } else {
1035                 /* tree is full, time to grow in depth */
1036                 err = ext4_ext_grow_indepth(handle, inode, path, newext);
1037                 if (err)
1038                         goto out;
1039
1040                 /* refill path */
1041                 ext4_ext_drop_refs(path);
1042                 path = ext4_ext_find_extent(inode,
1043                                    (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1044                                     path);
1045                 if (IS_ERR(path)) {
1046                         err = PTR_ERR(path);
1047                         goto out;
1048                 }
1049
1050                 /*
1051                  * only first (depth 0 -> 1) produces free space;
1052                  * in all other cases we have to split the grown tree
1053                  */
1054                 depth = ext_depth(inode);
1055                 if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
1056                         /* now we need to split */
1057                         goto repeat;
1058                 }
1059         }
1060
1061 out:
1062         return err;
1063 }
1064
1065 /*
1066  * search the closest allocated block to the left for *logical
1067  * and returns it at @logical + it's physical address at @phys
1068  * if *logical is the smallest allocated block, the function
1069  * returns 0 at @phys
1070  * return value contains 0 (success) or error code
1071  */
1072 int
1073 ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
1074                         ext4_lblk_t *logical, ext4_fsblk_t *phys)
1075 {
1076         struct ext4_extent_idx *ix;
1077         struct ext4_extent *ex;
1078         int depth, ee_len;
1079
1080         BUG_ON(path == NULL);
1081         depth = path->p_depth;
1082         *phys = 0;
1083
1084         if (depth == 0 && path->p_ext == NULL)
1085                 return 0;
1086
1087         /* usually extent in the path covers blocks smaller
1088          * then *logical, but it can be that extent is the
1089          * first one in the file */
1090
1091         ex = path[depth].p_ext;
1092         ee_len = ext4_ext_get_actual_len(ex);
1093         if (*logical < le32_to_cpu(ex->ee_block)) {
1094                 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
1095                 while (--depth >= 0) {
1096                         ix = path[depth].p_idx;
1097                         BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
1098                 }
1099                 return 0;
1100         }
1101
1102         BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
1103
1104         *logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
1105         *phys = ext_pblock(ex) + ee_len - 1;
1106         return 0;
1107 }
1108
1109 /*
1110  * search the closest allocated block to the right for *logical
1111  * and returns it at @logical + it's physical address at @phys
1112  * if *logical is the smallest allocated block, the function
1113  * returns 0 at @phys
1114  * return value contains 0 (success) or error code
1115  */
1116 int
1117 ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
1118                         ext4_lblk_t *logical, ext4_fsblk_t *phys)
1119 {
1120         struct buffer_head *bh = NULL;
1121         struct ext4_extent_header *eh;
1122         struct ext4_extent_idx *ix;
1123         struct ext4_extent *ex;
1124         ext4_fsblk_t block;
1125         int depth, ee_len;
1126
1127         BUG_ON(path == NULL);
1128         depth = path->p_depth;
1129         *phys = 0;
1130
1131         if (depth == 0 && path->p_ext == NULL)
1132                 return 0;
1133
1134         /* usually extent in the path covers blocks smaller
1135          * then *logical, but it can be that extent is the
1136          * first one in the file */
1137
1138         ex = path[depth].p_ext;
1139         ee_len = ext4_ext_get_actual_len(ex);
1140         if (*logical < le32_to_cpu(ex->ee_block)) {
1141                 BUG_ON(EXT_FIRST_EXTENT(path[depth].p_hdr) != ex);
1142                 while (--depth >= 0) {
1143                         ix = path[depth].p_idx;
1144                         BUG_ON(ix != EXT_FIRST_INDEX(path[depth].p_hdr));
1145                 }
1146                 *logical = le32_to_cpu(ex->ee_block);
1147                 *phys = ext_pblock(ex);
1148                 return 0;
1149         }
1150
1151         BUG_ON(*logical < (le32_to_cpu(ex->ee_block) + ee_len));
1152
1153         if (ex != EXT_LAST_EXTENT(path[depth].p_hdr)) {
1154                 /* next allocated block in this leaf */
1155                 ex++;
1156                 *logical = le32_to_cpu(ex->ee_block);
1157                 *phys = ext_pblock(ex);
1158                 return 0;
1159         }
1160
1161         /* go up and search for index to the right */
1162         while (--depth >= 0) {
1163                 ix = path[depth].p_idx;
1164                 if (ix != EXT_LAST_INDEX(path[depth].p_hdr))
1165                         goto got_index;
1166         }
1167
1168         /* we've gone up to the root and found no index to the right */
1169         return 0;
1170
1171 got_index:
1172         /* we've found index to the right, let's
1173          * follow it and find the closest allocated
1174          * block to the right */
1175         ix++;
1176         block = idx_pblock(ix);
1177         while (++depth < path->p_depth) {
1178                 bh = sb_bread(inode->i_sb, block);
1179                 if (bh == NULL)
1180                         return -EIO;
1181                 eh = ext_block_hdr(bh);
1182                 if (ext4_ext_check_header(inode, eh, depth)) {
1183                         put_bh(bh);
1184                         return -EIO;
1185                 }
1186                 ix = EXT_FIRST_INDEX(eh);
1187                 block = idx_pblock(ix);
1188                 put_bh(bh);
1189         }
1190
1191         bh = sb_bread(inode->i_sb, block);
1192         if (bh == NULL)
1193                 return -EIO;
1194         eh = ext_block_hdr(bh);
1195         if (ext4_ext_check_header(inode, eh, path->p_depth - depth)) {
1196                 put_bh(bh);
1197                 return -EIO;
1198         }
1199         ex = EXT_FIRST_EXTENT(eh);
1200         *logical = le32_to_cpu(ex->ee_block);
1201         *phys = ext_pblock(ex);
1202         put_bh(bh);
1203         return 0;
1204 }
1205
1206 /*
1207  * ext4_ext_next_allocated_block:
1208  * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
1209  * NOTE: it considers block number from index entry as
1210  * allocated block. Thus, index entries have to be consistent
1211  * with leaves.
1212  */
1213 static ext4_lblk_t
1214 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1215 {
1216         int depth;
1217
1218         BUG_ON(path == NULL);
1219         depth = path->p_depth;
1220
1221         if (depth == 0 && path->p_ext == NULL)
1222                 return EXT_MAX_BLOCK;
1223
1224         while (depth >= 0) {
1225                 if (depth == path->p_depth) {
1226                         /* leaf */
1227                         if (path[depth].p_ext !=
1228                                         EXT_LAST_EXTENT(path[depth].p_hdr))
1229                           return le32_to_cpu(path[depth].p_ext[1].ee_block);
1230                 } else {
1231                         /* index */
1232                         if (path[depth].p_idx !=
1233                                         EXT_LAST_INDEX(path[depth].p_hdr))
1234                           return le32_to_cpu(path[depth].p_idx[1].ei_block);
1235                 }
1236                 depth--;
1237         }
1238
1239         return EXT_MAX_BLOCK;
1240 }
1241
1242 /*
1243  * ext4_ext_next_leaf_block:
1244  * returns first allocated block from next leaf or EXT_MAX_BLOCK
1245  */
1246 static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
1247                                         struct ext4_ext_path *path)
1248 {
1249         int depth;
1250
1251         BUG_ON(path == NULL);
1252         depth = path->p_depth;
1253
1254         /* zero-tree has no leaf blocks at all */
1255         if (depth == 0)
1256                 return EXT_MAX_BLOCK;
1257
1258         /* go to index block */
1259         depth--;
1260
1261         while (depth >= 0) {
1262                 if (path[depth].p_idx !=
1263                                 EXT_LAST_INDEX(path[depth].p_hdr))
1264                         return (ext4_lblk_t)
1265                                 le32_to_cpu(path[depth].p_idx[1].ei_block);
1266                 depth--;
1267         }
1268
1269         return EXT_MAX_BLOCK;
1270 }
1271
1272 /*
1273  * ext4_ext_correct_indexes:
1274  * if leaf gets modified and modified extent is first in the leaf,
1275  * then we have to correct all indexes above.
1276  * TODO: do we need to correct tree in all cases?
1277  */
1278 static int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
1279                                 struct ext4_ext_path *path)
1280 {
1281         struct ext4_extent_header *eh;
1282         int depth = ext_depth(inode);
1283         struct ext4_extent *ex;
1284         __le32 border;
1285         int k, err = 0;
1286
1287         eh = path[depth].p_hdr;
1288         ex = path[depth].p_ext;
1289         BUG_ON(ex == NULL);
1290         BUG_ON(eh == NULL);
1291
1292         if (depth == 0) {
1293                 /* there is no tree at all */
1294                 return 0;
1295         }
1296
1297         if (ex != EXT_FIRST_EXTENT(eh)) {
1298                 /* we correct tree if first leaf got modified only */
1299                 return 0;
1300         }
1301
1302         /*
1303          * TODO: we need correction if border is smaller than current one
1304          */
1305         k = depth - 1;
1306         border = path[depth].p_ext->ee_block;
1307         err = ext4_ext_get_access(handle, inode, path + k);
1308         if (err)
1309                 return err;
1310         path[k].p_idx->ei_block = border;
1311         err = ext4_ext_dirty(handle, inode, path + k);
1312         if (err)
1313                 return err;
1314
1315         while (k--) {
1316                 /* change all left-side indexes */
1317                 if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
1318                         break;
1319                 err = ext4_ext_get_access(handle, inode, path + k);
1320                 if (err)
1321                         break;
1322                 path[k].p_idx->ei_block = border;
1323                 err = ext4_ext_dirty(handle, inode, path + k);
1324                 if (err)
1325                         break;
1326         }
1327
1328         return err;
1329 }
1330
1331 static int
1332 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1333                                 struct ext4_extent *ex2)
1334 {
1335         unsigned short ext1_ee_len, ext2_ee_len, max_len;
1336
1337         /*
1338          * Make sure that either both extents are uninitialized, or
1339          * both are _not_.
1340          */
1341         if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2))
1342                 return 0;
1343
1344         if (ext4_ext_is_uninitialized(ex1))
1345                 max_len = EXT_UNINIT_MAX_LEN;
1346         else
1347                 max_len = EXT_INIT_MAX_LEN;
1348
1349         ext1_ee_len = ext4_ext_get_actual_len(ex1);
1350         ext2_ee_len = ext4_ext_get_actual_len(ex2);
1351
1352         if (le32_to_cpu(ex1->ee_block) + ext1_ee_len !=
1353                         le32_to_cpu(ex2->ee_block))
1354                 return 0;
1355
1356         /*
1357          * To allow future support for preallocated extents to be added
1358          * as an RO_COMPAT feature, refuse to merge to extents if
1359          * this can result in the top bit of ee_len being set.
1360          */
1361         if (ext1_ee_len + ext2_ee_len > max_len)
1362                 return 0;
1363 #ifdef AGGRESSIVE_TEST
1364         if (ext1_ee_len >= 4)
1365                 return 0;
1366 #endif
1367
1368         if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
1369                 return 1;
1370         return 0;
1371 }
1372
1373 /*
1374  * This function tries to merge the "ex" extent to the next extent in the tree.
1375  * It always tries to merge towards right. If you want to merge towards
1376  * left, pass "ex - 1" as argument instead of "ex".
1377  * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
1378  * 1 if they got merged.
1379  */
1380 int ext4_ext_try_to_merge(struct inode *inode,
1381                           struct ext4_ext_path *path,
1382                           struct ext4_extent *ex)
1383 {
1384         struct ext4_extent_header *eh;
1385         unsigned int depth, len;
1386         int merge_done = 0;
1387         int uninitialized = 0;
1388
1389         depth = ext_depth(inode);
1390         BUG_ON(path[depth].p_hdr == NULL);
1391         eh = path[depth].p_hdr;
1392
1393         while (ex < EXT_LAST_EXTENT(eh)) {
1394                 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1395                         break;
1396                 /* merge with next extent! */
1397                 if (ext4_ext_is_uninitialized(ex))
1398                         uninitialized = 1;
1399                 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1400                                 + ext4_ext_get_actual_len(ex + 1));
1401                 if (uninitialized)
1402                         ext4_ext_mark_uninitialized(ex);
1403
1404                 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1405                         len = (EXT_LAST_EXTENT(eh) - ex - 1)
1406                                 * sizeof(struct ext4_extent);
1407                         memmove(ex + 1, ex + 2, len);
1408                 }
1409                 le16_add_cpu(&eh->eh_entries, -1);
1410                 merge_done = 1;
1411                 WARN_ON(eh->eh_entries == 0);
1412                 if (!eh->eh_entries)
1413                         ext4_error(inode->i_sb, "ext4_ext_try_to_merge",
1414                            "inode#%lu, eh->eh_entries = 0!", inode->i_ino);
1415         }
1416
1417         return merge_done;
1418 }
1419
1420 /*
1421  * check if a portion of the "newext" extent overlaps with an
1422  * existing extent.
1423  *
1424  * If there is an overlap discovered, it updates the length of the newext
1425  * such that there will be no overlap, and then returns 1.
1426  * If there is no overlap found, it returns 0.
1427  */
1428 unsigned int ext4_ext_check_overlap(struct inode *inode,
1429                                     struct ext4_extent *newext,
1430                                     struct ext4_ext_path *path)
1431 {
1432         ext4_lblk_t b1, b2;
1433         unsigned int depth, len1;
1434         unsigned int ret = 0;
1435
1436         b1 = le32_to_cpu(newext->ee_block);
1437         len1 = ext4_ext_get_actual_len(newext);
1438         depth = ext_depth(inode);
1439         if (!path[depth].p_ext)
1440                 goto out;
1441         b2 = le32_to_cpu(path[depth].p_ext->ee_block);
1442
1443         /*
1444          * get the next allocated block if the extent in the path
1445          * is before the requested block(s)
1446          */
1447         if (b2 < b1) {
1448                 b2 = ext4_ext_next_allocated_block(path);
1449                 if (b2 == EXT_MAX_BLOCK)
1450                         goto out;
1451         }
1452
1453         /* check for wrap through zero on extent logical start block*/
1454         if (b1 + len1 < b1) {
1455                 len1 = EXT_MAX_BLOCK - b1;
1456                 newext->ee_len = cpu_to_le16(len1);
1457                 ret = 1;
1458         }
1459
1460         /* check for overlap */
1461         if (b1 + len1 > b2) {
1462                 newext->ee_len = cpu_to_le16(b2 - b1);
1463                 ret = 1;
1464         }
1465 out:
1466         return ret;
1467 }
1468
1469 /*
1470  * ext4_ext_insert_extent:
1471  * tries to merge requsted extent into the existing extent or
1472  * inserts requested extent as new one into the tree,
1473  * creating new leaf in the no-space case.
1474  */
1475 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1476                                 struct ext4_ext_path *path,
1477                                 struct ext4_extent *newext)
1478 {
1479         struct ext4_extent_header *eh;
1480         struct ext4_extent *ex, *fex;
1481         struct ext4_extent *nearex; /* nearest extent */
1482         struct ext4_ext_path *npath = NULL;
1483         int depth, len, err;
1484         ext4_lblk_t next;
1485         unsigned uninitialized = 0;
1486
1487         BUG_ON(ext4_ext_get_actual_len(newext) == 0);
1488         depth = ext_depth(inode);
1489         ex = path[depth].p_ext;
1490         BUG_ON(path[depth].p_hdr == NULL);
1491
1492         /* try to insert block into found extent and return */
1493         if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
1494                 ext_debug("append %d block to %d:%d (from %llu)\n",
1495                                 ext4_ext_get_actual_len(newext),
1496                                 le32_to_cpu(ex->ee_block),
1497                                 ext4_ext_get_actual_len(ex), ext_pblock(ex));
1498                 err = ext4_ext_get_access(handle, inode, path + depth);
1499                 if (err)
1500                         return err;
1501
1502                 /*
1503                  * ext4_can_extents_be_merged should have checked that either
1504                  * both extents are uninitialized, or both aren't. Thus we
1505                  * need to check only one of them here.
1506                  */
1507                 if (ext4_ext_is_uninitialized(ex))
1508                         uninitialized = 1;
1509                 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1510                                         + ext4_ext_get_actual_len(newext));
1511                 if (uninitialized)
1512                         ext4_ext_mark_uninitialized(ex);
1513                 eh = path[depth].p_hdr;
1514                 nearex = ex;
1515                 goto merge;
1516         }
1517
1518 repeat:
1519         depth = ext_depth(inode);
1520         eh = path[depth].p_hdr;
1521         if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
1522                 goto has_space;
1523
1524         /* probably next leaf has space for us? */
1525         fex = EXT_LAST_EXTENT(eh);
1526         next = ext4_ext_next_leaf_block(inode, path);
1527         if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
1528             && next != EXT_MAX_BLOCK) {
1529                 ext_debug("next leaf block - %d\n", next);
1530                 BUG_ON(npath != NULL);
1531                 npath = ext4_ext_find_extent(inode, next, NULL);
1532                 if (IS_ERR(npath))
1533                         return PTR_ERR(npath);
1534                 BUG_ON(npath->p_depth != path->p_depth);
1535                 eh = npath[depth].p_hdr;
1536                 if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
1537                         ext_debug("next leaf isnt full(%d)\n",
1538                                   le16_to_cpu(eh->eh_entries));
1539                         path = npath;
1540                         goto repeat;
1541                 }
1542                 ext_debug("next leaf has no free space(%d,%d)\n",
1543                           le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
1544         }
1545
1546         /*
1547          * There is no free space in the found leaf.
1548          * We're gonna add a new leaf in the tree.
1549          */
1550         err = ext4_ext_create_new_leaf(handle, inode, path, newext);
1551         if (err)
1552                 goto cleanup;
1553         depth = ext_depth(inode);
1554         eh = path[depth].p_hdr;
1555
1556 has_space:
1557         nearex = path[depth].p_ext;
1558
1559         err = ext4_ext_get_access(handle, inode, path + depth);
1560         if (err)
1561                 goto cleanup;
1562
1563         if (!nearex) {
1564                 /* there is no extent in this leaf, create first one */
1565                 ext_debug("first extent in the leaf: %d:%llu:%d\n",
1566                                 le32_to_cpu(newext->ee_block),
1567                                 ext_pblock(newext),
1568                                 ext4_ext_get_actual_len(newext));
1569                 path[depth].p_ext = EXT_FIRST_EXTENT(eh);
1570         } else if (le32_to_cpu(newext->ee_block)
1571                            > le32_to_cpu(nearex->ee_block)) {
1572 /*              BUG_ON(newext->ee_block == nearex->ee_block); */
1573                 if (nearex != EXT_LAST_EXTENT(eh)) {
1574                         len = EXT_MAX_EXTENT(eh) - nearex;
1575                         len = (len - 1) * sizeof(struct ext4_extent);
1576                         len = len < 0 ? 0 : len;
1577                         ext_debug("insert %d:%llu:%d after: nearest 0x%p, "
1578                                         "move %d from 0x%p to 0x%p\n",
1579                                         le32_to_cpu(newext->ee_block),
1580                                         ext_pblock(newext),
1581                                         ext4_ext_get_actual_len(newext),
1582                                         nearex, len, nearex + 1, nearex + 2);
1583                         memmove(nearex + 2, nearex + 1, len);
1584                 }
1585                 path[depth].p_ext = nearex + 1;
1586         } else {
1587                 BUG_ON(newext->ee_block == nearex->ee_block);
1588                 len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
1589                 len = len < 0 ? 0 : len;
1590                 ext_debug("insert %d:%llu:%d before: nearest 0x%p, "
1591                                 "move %d from 0x%p to 0x%p\n",
1592                                 le32_to_cpu(newext->ee_block),
1593                                 ext_pblock(newext),
1594                                 ext4_ext_get_actual_len(newext),
1595                                 nearex, len, nearex + 1, nearex + 2);
1596                 memmove(nearex + 1, nearex, len);
1597                 path[depth].p_ext = nearex;
1598         }
1599
1600         le16_add_cpu(&eh->eh_entries, 1);
1601         nearex = path[depth].p_ext;
1602         nearex->ee_block = newext->ee_block;
1603         ext4_ext_store_pblock(nearex, ext_pblock(newext));
1604         nearex->ee_len = newext->ee_len;
1605
1606 merge:
1607         /* try to merge extents to the right */
1608         ext4_ext_try_to_merge(inode, path, nearex);
1609
1610         /* try to merge extents to the left */
1611
1612         /* time to correct all indexes above */
1613         err = ext4_ext_correct_indexes(handle, inode, path);
1614         if (err)
1615                 goto cleanup;
1616
1617         err = ext4_ext_dirty(handle, inode, path + depth);
1618
1619 cleanup:
1620         if (npath) {
1621                 ext4_ext_drop_refs(npath);
1622                 kfree(npath);
1623         }
1624         ext4_ext_invalidate_cache(inode);
1625         return err;
1626 }
1627
1628 int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1629                         ext4_lblk_t num, ext_prepare_callback func,
1630                         void *cbdata)
1631 {
1632         struct ext4_ext_path *path = NULL;
1633         struct ext4_ext_cache cbex;
1634         struct ext4_extent *ex;
1635         ext4_lblk_t next, start = 0, end = 0;
1636         ext4_lblk_t last = block + num;
1637         int depth, exists, err = 0;
1638
1639         BUG_ON(func == NULL);
1640         BUG_ON(inode == NULL);
1641
1642         while (block < last && block != EXT_MAX_BLOCK) {
1643                 num = last - block;
1644                 /* find extent for this block */
1645                 path = ext4_ext_find_extent(inode, block, path);
1646                 if (IS_ERR(path)) {
1647                         err = PTR_ERR(path);
1648                         path = NULL;
1649                         break;
1650                 }
1651
1652                 depth = ext_depth(inode);
1653                 BUG_ON(path[depth].p_hdr == NULL);
1654                 ex = path[depth].p_ext;
1655                 next = ext4_ext_next_allocated_block(path);
1656
1657                 exists = 0;
1658                 if (!ex) {
1659                         /* there is no extent yet, so try to allocate
1660                          * all requested space */
1661                         start = block;
1662                         end = block + num;
1663                 } else if (le32_to_cpu(ex->ee_block) > block) {
1664                         /* need to allocate space before found extent */
1665                         start = block;
1666                         end = le32_to_cpu(ex->ee_block);
1667                         if (block + num < end)
1668                                 end = block + num;
1669                 } else if (block >= le32_to_cpu(ex->ee_block)
1670                                         + ext4_ext_get_actual_len(ex)) {
1671                         /* need to allocate space after found extent */
1672                         start = block;
1673                         end = block + num;
1674                         if (end >= next)
1675                                 end = next;
1676                 } else if (block >= le32_to_cpu(ex->ee_block)) {
1677                         /*
1678                          * some part of requested space is covered
1679                          * by found extent
1680                          */
1681                         start = block;
1682                         end = le32_to_cpu(ex->ee_block)
1683                                 + ext4_ext_get_actual_len(ex);
1684                         if (block + num < end)
1685                                 end = block + num;
1686                         exists = 1;
1687                 } else {
1688                         BUG();
1689                 }
1690                 BUG_ON(end <= start);
1691
1692                 if (!exists) {
1693                         cbex.ec_block = start;
1694                         cbex.ec_len = end - start;
1695                         cbex.ec_start = 0;
1696                         cbex.ec_type = EXT4_EXT_CACHE_GAP;
1697                 } else {
1698                         cbex.ec_block = le32_to_cpu(ex->ee_block);
1699                         cbex.ec_len = ext4_ext_get_actual_len(ex);
1700                         cbex.ec_start = ext_pblock(ex);
1701                         cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
1702                 }
1703
1704                 BUG_ON(cbex.ec_len == 0);
1705                 err = func(inode, path, &cbex, ex, cbdata);
1706                 ext4_ext_drop_refs(path);
1707
1708                 if (err < 0)
1709                         break;
1710
1711                 if (err == EXT_REPEAT)
1712                         continue;
1713                 else if (err == EXT_BREAK) {
1714                         err = 0;
1715                         break;
1716                 }
1717
1718                 if (ext_depth(inode) != depth) {
1719                         /* depth was changed. we have to realloc path */
1720                         kfree(path);
1721                         path = NULL;
1722                 }
1723
1724                 block = cbex.ec_block + cbex.ec_len;
1725         }
1726
1727         if (path) {
1728                 ext4_ext_drop_refs(path);
1729                 kfree(path);
1730         }
1731
1732         return err;
1733 }
1734
1735 static void
1736 ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block,
1737                         __u32 len, ext4_fsblk_t start, int type)
1738 {
1739         struct ext4_ext_cache *cex;
1740         BUG_ON(len == 0);
1741         cex = &EXT4_I(inode)->i_cached_extent;
1742         cex->ec_type = type;
1743         cex->ec_block = block;
1744         cex->ec_len = len;
1745         cex->ec_start = start;
1746 }
1747
1748 /*
1749  * ext4_ext_put_gap_in_cache:
1750  * calculate boundaries of the gap that the requested block fits into
1751  * and cache this gap
1752  */
1753 static void
1754 ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
1755                                 ext4_lblk_t block)
1756 {
1757         int depth = ext_depth(inode);
1758         unsigned long len;
1759         ext4_lblk_t lblock;
1760         struct ext4_extent *ex;
1761
1762         ex = path[depth].p_ext;
1763         if (ex == NULL) {
1764                 /* there is no extent yet, so gap is [0;-] */
1765                 lblock = 0;
1766                 len = EXT_MAX_BLOCK;
1767                 ext_debug("cache gap(whole file):");
1768         } else if (block < le32_to_cpu(ex->ee_block)) {
1769                 lblock = block;
1770                 len = le32_to_cpu(ex->ee_block) - block;
1771                 ext_debug("cache gap(before): %u [%u:%u]",
1772                                 block,
1773                                 le32_to_cpu(ex->ee_block),
1774                                  ext4_ext_get_actual_len(ex));
1775         } else if (block >= le32_to_cpu(ex->ee_block)
1776                         + ext4_ext_get_actual_len(ex)) {
1777                 ext4_lblk_t next;
1778                 lblock = le32_to_cpu(ex->ee_block)
1779                         + ext4_ext_get_actual_len(ex);
1780
1781                 next = ext4_ext_next_allocated_block(path);
1782                 ext_debug("cache gap(after): [%u:%u] %u",
1783                                 le32_to_cpu(ex->ee_block),
1784                                 ext4_ext_get_actual_len(ex),
1785                                 block);
1786                 BUG_ON(next == lblock);
1787                 len = next - lblock;
1788         } else {
1789                 lblock = len = 0;
1790                 BUG();
1791         }
1792
1793         ext_debug(" -> %u:%lu\n", lblock, len);
1794         ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
1795 }
1796
1797 static int
1798 ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
1799                         struct ext4_extent *ex)
1800 {
1801         struct ext4_ext_cache *cex;
1802
1803         cex = &EXT4_I(inode)->i_cached_extent;
1804
1805         /* has cache valid data? */
1806         if (cex->ec_type == EXT4_EXT_CACHE_NO)
1807                 return EXT4_EXT_CACHE_NO;
1808
1809         BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
1810                         cex->ec_type != EXT4_EXT_CACHE_EXTENT);
1811         if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
1812                 ex->ee_block = cpu_to_le32(cex->ec_block);
1813                 ext4_ext_store_pblock(ex, cex->ec_start);
1814                 ex->ee_len = cpu_to_le16(cex->ec_len);
1815                 ext_debug("%u cached by %u:%u:%llu\n",
1816                                 block,
1817                                 cex->ec_block, cex->ec_len, cex->ec_start);
1818                 return cex->ec_type;
1819         }
1820
1821         /* not in cache */
1822         return EXT4_EXT_CACHE_NO;
1823 }
1824
1825 /*
1826  * ext4_ext_rm_idx:
1827  * removes index from the index block.
1828  * It's used in truncate case only, thus all requests are for
1829  * last index in the block only.
1830  */
1831 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1832                         struct ext4_ext_path *path)
1833 {
1834         struct buffer_head *bh;
1835         int err;
1836         ext4_fsblk_t leaf;
1837
1838         /* free index block */
1839         path--;
1840         leaf = idx_pblock(path->p_idx);
1841         BUG_ON(path->p_hdr->eh_entries == 0);
1842         err = ext4_ext_get_access(handle, inode, path);
1843         if (err)
1844                 return err;
1845         le16_add_cpu(&path->p_hdr->eh_entries, -1);
1846         err = ext4_ext_dirty(handle, inode, path);
1847         if (err)
1848                 return err;
1849         ext_debug("index is empty, remove it, free block %llu\n", leaf);
1850         bh = sb_find_get_block(inode->i_sb, leaf);
1851         ext4_forget(handle, 1, inode, bh, leaf);
1852         ext4_free_blocks(handle, inode, leaf, 1, 1);
1853         return err;
1854 }
1855
1856 /*
1857  * ext4_ext_calc_credits_for_single_extent:
1858  * This routine returns max. credits that needed to insert an extent
1859  * to the extent tree.
1860  * When pass the actual path, the caller should calculate credits
1861  * under i_data_sem.
1862  */
1863 int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks,
1864                                                 struct ext4_ext_path *path)
1865 {
1866         if (path) {
1867                 int depth = ext_depth(inode);
1868                 int ret = 0;
1869
1870                 /* probably there is space in leaf? */
1871                 if (le16_to_cpu(path[depth].p_hdr->eh_entries)
1872                                 < le16_to_cpu(path[depth].p_hdr->eh_max)) {
1873
1874                         /*
1875                          *  There are some space in the leaf tree, no
1876                          *  need to account for leaf block credit
1877                          *
1878                          *  bitmaps and block group descriptor blocks
1879                          *  and other metadat blocks still need to be
1880                          *  accounted.
1881                          */
1882                         /* 1 bitmap, 1 block group descriptor */
1883                         ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb);
1884                 }
1885         }
1886
1887         return ext4_chunk_trans_blocks(inode, nrblocks);
1888 }
1889
1890 /*
1891  * How many index/leaf blocks need to change/allocate to modify nrblocks?
1892  *
1893  * if nrblocks are fit in a single extent (chunk flag is 1), then
1894  * in the worse case, each tree level index/leaf need to be changed
1895  * if the tree split due to insert a new extent, then the old tree
1896  * index/leaf need to be updated too
1897  *
1898  * If the nrblocks are discontiguous, they could cause
1899  * the whole tree split more than once, but this is really rare.
1900  */
1901 int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
1902 {
1903         int index;
1904         int depth = ext_depth(inode);
1905
1906         if (chunk)
1907                 index = depth * 2;
1908         else
1909                 index = depth * 3;
1910
1911         return index;
1912 }
1913
1914 static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
1915                                 struct ext4_extent *ex,
1916                                 ext4_lblk_t from, ext4_lblk_t to)
1917 {
1918         struct buffer_head *bh;
1919         unsigned short ee_len =  ext4_ext_get_actual_len(ex);
1920         int i, metadata = 0;
1921
1922         if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
1923                 metadata = 1;
1924 #ifdef EXTENTS_STATS
1925         {
1926                 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1927                 spin_lock(&sbi->s_ext_stats_lock);
1928                 sbi->s_ext_blocks += ee_len;
1929                 sbi->s_ext_extents++;
1930                 if (ee_len < sbi->s_ext_min)
1931                         sbi->s_ext_min = ee_len;
1932                 if (ee_len > sbi->s_ext_max)
1933                         sbi->s_ext_max = ee_len;
1934                 if (ext_depth(inode) > sbi->s_depth_max)
1935                         sbi->s_depth_max = ext_depth(inode);
1936                 spin_unlock(&sbi->s_ext_stats_lock);
1937         }
1938 #endif
1939         if (from >= le32_to_cpu(ex->ee_block)
1940             && to == le32_to_cpu(ex->ee_block) + ee_len - 1) {
1941                 /* tail removal */
1942                 ext4_lblk_t num;
1943                 ext4_fsblk_t start;
1944
1945                 num = le32_to_cpu(ex->ee_block) + ee_len - from;
1946                 start = ext_pblock(ex) + ee_len - num;
1947                 ext_debug("free last %u blocks starting %llu\n", num, start);
1948                 for (i = 0; i < num; i++) {
1949                         bh = sb_find_get_block(inode->i_sb, start + i);
1950                         ext4_forget(handle, 0, inode, bh, start + i);
1951                 }
1952                 ext4_free_blocks(handle, inode, start, num, metadata);
1953         } else if (from == le32_to_cpu(ex->ee_block)
1954                    && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
1955                 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
1956                         from, to, le32_to_cpu(ex->ee_block), ee_len);
1957         } else {
1958                 printk(KERN_INFO "strange request: removal(2) "
1959                                 "%u-%u from %u:%u\n",
1960                                 from, to, le32_to_cpu(ex->ee_block), ee_len);
1961         }
1962         return 0;
1963 }
1964
1965 static int
1966 ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
1967                 struct ext4_ext_path *path, ext4_lblk_t start)
1968 {
1969         int err = 0, correct_index = 0;
1970         int depth = ext_depth(inode), credits;
1971         struct ext4_extent_header *eh;
1972         ext4_lblk_t a, b, block;
1973         unsigned num;
1974         ext4_lblk_t ex_ee_block;
1975         unsigned short ex_ee_len;
1976         unsigned uninitialized = 0;
1977         struct ext4_extent *ex;
1978
1979         /* the header must be checked already in ext4_ext_remove_space() */
1980         ext_debug("truncate since %u in leaf\n", start);
1981         if (!path[depth].p_hdr)
1982                 path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
1983         eh = path[depth].p_hdr;
1984         BUG_ON(eh == NULL);
1985
1986         /* find where to start removing */
1987         ex = EXT_LAST_EXTENT(eh);
1988
1989         ex_ee_block = le32_to_cpu(ex->ee_block);
1990         if (ext4_ext_is_uninitialized(ex))
1991                 uninitialized = 1;
1992         ex_ee_len = ext4_ext_get_actual_len(ex);
1993
1994         while (ex >= EXT_FIRST_EXTENT(eh) &&
1995                         ex_ee_block + ex_ee_len > start) {
1996                 ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
1997                 path[depth].p_ext = ex;
1998
1999                 a = ex_ee_block > start ? ex_ee_block : start;
2000                 b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ?
2001                         ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK;
2002
2003                 ext_debug("  border %u:%u\n", a, b);
2004
2005                 if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) {
2006                         block = 0;
2007                         num = 0;
2008                         BUG();
2009                 } else if (a != ex_ee_block) {
2010                         /* remove tail of the extent */
2011                         block = ex_ee_block;
2012                         num = a - block;
2013                 } else if (b != ex_ee_block + ex_ee_len - 1) {
2014                         /* remove head of the extent */
2015                         block = a;
2016                         num = b - a;
2017                         /* there is no "make a hole" API yet */
2018                         BUG();
2019                 } else {
2020                         /* remove whole extent: excellent! */
2021                         block = ex_ee_block;
2022                         num = 0;
2023                         BUG_ON(a != ex_ee_block);
2024                         BUG_ON(b != ex_ee_block + ex_ee_len - 1);
2025                 }
2026
2027                 /*
2028                  * 3 for leaf, sb, and inode plus 2 (bmap and group
2029                  * descriptor) for each block group; assume two block
2030                  * groups plus ex_ee_len/blocks_per_block_group for
2031                  * the worst case
2032                  */
2033                 credits = 7 + 2*(ex_ee_len/EXT4_BLOCKS_PER_GROUP(inode->i_sb));
2034                 if (ex == EXT_FIRST_EXTENT(eh)) {
2035                         correct_index = 1;
2036                         credits += (ext_depth(inode)) + 1;
2037                 }
2038                 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
2039
2040                 err = ext4_ext_journal_restart(handle, credits);
2041                 if (err)
2042                         goto out;
2043
2044                 err = ext4_ext_get_access(handle, inode, path + depth);
2045                 if (err)
2046                         goto out;
2047
2048                 err = ext4_remove_blocks(handle, inode, ex, a, b);
2049                 if (err)
2050                         goto out;
2051
2052                 if (num == 0) {
2053                         /* this extent is removed; mark slot entirely unused */
2054                         ext4_ext_store_pblock(ex, 0);
2055                         le16_add_cpu(&eh->eh_entries, -1);
2056                 }
2057
2058                 ex->ee_block = cpu_to_le32(block);
2059                 ex->ee_len = cpu_to_le16(num);
2060                 /*
2061                  * Do not mark uninitialized if all the blocks in the
2062                  * extent have been removed.
2063                  */
2064                 if (uninitialized && num)
2065                         ext4_ext_mark_uninitialized(ex);
2066
2067                 err = ext4_ext_dirty(handle, inode, path + depth);
2068                 if (err)
2069                         goto out;
2070
2071                 ext_debug("new extent: %u:%u:%llu\n", block, num,
2072                                 ext_pblock(ex));
2073                 ex--;
2074                 ex_ee_block = le32_to_cpu(ex->ee_block);
2075                 ex_ee_len = ext4_ext_get_actual_len(ex);
2076         }
2077
2078         if (correct_index && eh->eh_entries)
2079                 err = ext4_ext_correct_indexes(handle, inode, path);
2080
2081         /* if this leaf is free, then we should
2082          * remove it from index block above */
2083         if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
2084                 err = ext4_ext_rm_idx(handle, inode, path + depth);
2085
2086 out:
2087         return err;
2088 }
2089
2090 /*
2091  * ext4_ext_more_to_rm:
2092  * returns 1 if current index has to be freed (even partial)
2093  */
2094 static int
2095 ext4_ext_more_to_rm(struct ext4_ext_path *path)
2096 {
2097         BUG_ON(path->p_idx == NULL);
2098
2099         if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
2100                 return 0;
2101
2102         /*
2103          * if truncate on deeper level happened, it wasn't partial,
2104          * so we have to consider current index for truncation
2105          */
2106         if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
2107                 return 0;
2108         return 1;
2109 }
2110
2111 static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start)
2112 {
2113         struct super_block *sb = inode->i_sb;
2114         int depth = ext_depth(inode);
2115         struct ext4_ext_path *path;
2116         handle_t *handle;
2117         int i = 0, err = 0;
2118
2119         ext_debug("truncate since %u\n", start);
2120
2121         /* probably first extent we're gonna free will be last in block */
2122         handle = ext4_journal_start(inode, depth + 1);
2123         if (IS_ERR(handle))
2124                 return PTR_ERR(handle);
2125
2126         ext4_ext_invalidate_cache(inode);
2127
2128         /*
2129          * We start scanning from right side, freeing all the blocks
2130          * after i_size and walking into the tree depth-wise.
2131          */
2132         path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_NOFS);
2133         if (path == NULL) {
2134                 ext4_journal_stop(handle);
2135                 return -ENOMEM;
2136         }
2137         path[0].p_hdr = ext_inode_hdr(inode);
2138         if (ext4_ext_check_header(inode, path[0].p_hdr, depth)) {
2139                 err = -EIO;
2140                 goto out;
2141         }
2142         path[0].p_depth = depth;
2143
2144         while (i >= 0 && err == 0) {
2145                 if (i == depth) {
2146                         /* this is leaf block */
2147                         err = ext4_ext_rm_leaf(handle, inode, path, start);
2148                         /* root level has p_bh == NULL, brelse() eats this */
2149                         brelse(path[i].p_bh);
2150                         path[i].p_bh = NULL;
2151                         i--;
2152                         continue;
2153                 }
2154
2155                 /* this is index block */
2156                 if (!path[i].p_hdr) {
2157                         ext_debug("initialize header\n");
2158                         path[i].p_hdr = ext_block_hdr(path[i].p_bh);
2159                 }
2160
2161                 if (!path[i].p_idx) {
2162                         /* this level hasn't been touched yet */
2163                         path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
2164                         path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
2165                         ext_debug("init index ptr: hdr 0x%p, num %d\n",
2166                                   path[i].p_hdr,
2167                                   le16_to_cpu(path[i].p_hdr->eh_entries));
2168                 } else {
2169                         /* we were already here, see at next index */
2170                         path[i].p_idx--;
2171                 }
2172
2173                 ext_debug("level %d - index, first 0x%p, cur 0x%p\n",
2174                                 i, EXT_FIRST_INDEX(path[i].p_hdr),
2175                                 path[i].p_idx);
2176                 if (ext4_ext_more_to_rm(path + i)) {
2177                         struct buffer_head *bh;
2178                         /* go to the next level */
2179                         ext_debug("move to level %d (block %llu)\n",
2180                                   i + 1, idx_pblock(path[i].p_idx));
2181                         memset(path + i + 1, 0, sizeof(*path));
2182                         bh = sb_bread(sb, idx_pblock(path[i].p_idx));
2183                         if (!bh) {
2184                                 /* should we reset i_size? */
2185                                 err = -EIO;
2186                                 break;
2187                         }
2188                         if (WARN_ON(i + 1 > depth)) {
2189                                 err = -EIO;
2190                                 break;
2191                         }
2192                         if (ext4_ext_check_header(inode, ext_block_hdr(bh),
2193                                                         depth - i - 1)) {
2194                                 err = -EIO;
2195                                 break;
2196                         }
2197                         path[i + 1].p_bh = bh;
2198
2199                         /* save actual number of indexes since this
2200                          * number is changed at the next iteration */
2201                         path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
2202                         i++;
2203                 } else {
2204                         /* we finished processing this index, go up */
2205                         if (path[i].p_hdr->eh_entries == 0 && i > 0) {
2206                                 /* index is empty, remove it;
2207                                  * handle must be already prepared by the
2208                                  * truncatei_leaf() */
2209                                 err = ext4_ext_rm_idx(handle, inode, path + i);
2210                         }
2211                         /* root level has p_bh == NULL, brelse() eats this */
2212                         brelse(path[i].p_bh);
2213                         path[i].p_bh = NULL;
2214                         i--;
2215                         ext_debug("return to level %d\n", i);
2216                 }
2217         }
2218
2219         /* TODO: flexible tree reduction should be here */
2220         if (path->p_hdr->eh_entries == 0) {
2221                 /*
2222                  * truncate to zero freed all the tree,
2223                  * so we need to correct eh_depth
2224                  */
2225                 err = ext4_ext_get_access(handle, inode, path);
2226                 if (err == 0) {
2227                         ext_inode_hdr(inode)->eh_depth = 0;
2228                         ext_inode_hdr(inode)->eh_max =
2229                                 cpu_to_le16(ext4_ext_space_root(inode));
2230                         err = ext4_ext_dirty(handle, inode, path);
2231                 }
2232         }
2233 out:
2234         ext4_ext_drop_refs(path);
2235         kfree(path);
2236         ext4_journal_stop(handle);
2237
2238         return err;
2239 }
2240
2241 /*
2242  * called at mount time
2243  */
2244 void ext4_ext_init(struct super_block *sb)
2245 {
2246         /*
2247          * possible initialization would be here
2248          */
2249
2250         if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
2251                 printk(KERN_INFO "EXT4-fs: file extents enabled");
2252 #ifdef AGGRESSIVE_TEST
2253                 printk(", aggressive tests");
2254 #endif
2255 #ifdef CHECK_BINSEARCH
2256                 printk(", check binsearch");
2257 #endif
2258 #ifdef EXTENTS_STATS
2259                 printk(", stats");
2260 #endif
2261                 printk("\n");
2262 #ifdef EXTENTS_STATS
2263                 spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
2264                 EXT4_SB(sb)->s_ext_min = 1 << 30;
2265                 EXT4_SB(sb)->s_ext_max = 0;
2266 #endif
2267         }
2268 }
2269
2270 /*
2271  * called at umount time
2272  */
2273 void ext4_ext_release(struct super_block *sb)
2274 {
2275         if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
2276                 return;
2277
2278 #ifdef EXTENTS_STATS
2279         if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
2280                 struct ext4_sb_info *sbi = EXT4_SB(sb);
2281                 printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
2282                         sbi->s_ext_blocks, sbi->s_ext_extents,
2283                         sbi->s_ext_blocks / sbi->s_ext_extents);
2284                 printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
2285                         sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
2286         }
2287 #endif
2288 }
2289
2290 static void bi_complete(struct bio *bio, int error)
2291 {
2292         complete((struct completion *)bio->bi_private);
2293 }
2294
2295 /* FIXME!! we need to try to merge to left or right after zero-out  */
2296 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
2297 {
2298         int ret = -EIO;
2299         struct bio *bio;
2300         int blkbits, blocksize;
2301         sector_t ee_pblock;
2302         struct completion event;
2303         unsigned int ee_len, len, done, offset;
2304
2305
2306         blkbits   = inode->i_blkbits;
2307         blocksize = inode->i_sb->s_blocksize;
2308         ee_len    = ext4_ext_get_actual_len(ex);
2309         ee_pblock = ext_pblock(ex);
2310
2311         /* convert ee_pblock to 512 byte sectors */
2312         ee_pblock = ee_pblock << (blkbits - 9);
2313
2314         while (ee_len > 0) {
2315
2316                 if (ee_len > BIO_MAX_PAGES)
2317                         len = BIO_MAX_PAGES;
2318                 else
2319                         len = ee_len;
2320
2321                 bio = bio_alloc(GFP_NOIO, len);
2322                 if (!bio)
2323                         return -ENOMEM;
2324                 bio->bi_sector = ee_pblock;
2325                 bio->bi_bdev   = inode->i_sb->s_bdev;
2326
2327                 done = 0;
2328                 offset = 0;
2329                 while (done < len) {
2330                         ret = bio_add_page(bio, ZERO_PAGE(0),
2331                                                         blocksize, offset);
2332                         if (ret != blocksize) {
2333                                 /*
2334                                  * We can't add any more pages because of
2335                                  * hardware limitations.  Start a new bio.
2336                                  */
2337                                 break;
2338                         }
2339                         done++;
2340                         offset += blocksize;
2341                         if (offset >= PAGE_CACHE_SIZE)
2342                                 offset = 0;
2343                 }
2344
2345                 init_completion(&event);
2346                 bio->bi_private = &event;
2347                 bio->bi_end_io = bi_complete;
2348                 submit_bio(WRITE, bio);
2349                 wait_for_completion(&event);
2350
2351                 if (test_bit(BIO_UPTODATE, &bio->bi_flags))
2352                         ret = 0;
2353                 else {
2354                         ret = -EIO;
2355                         break;
2356                 }
2357                 bio_put(bio);
2358                 ee_len    -= done;
2359                 ee_pblock += done  << (blkbits - 9);
2360         }
2361         return ret;
2362 }
2363
2364 #define EXT4_EXT_ZERO_LEN 7
2365
2366 /*
2367  * This function is called by ext4_ext_get_blocks() if someone tries to write
2368  * to an uninitialized extent. It may result in splitting the uninitialized
2369  * extent into multiple extents (upto three - one initialized and two
2370  * uninitialized).
2371  * There are three possibilities:
2372  *   a> There is no split required: Entire extent should be initialized
2373  *   b> Splits in two extents: Write is happening at either end of the extent
2374  *   c> Splits in three extents: Somone is writing in middle of the extent
2375  */
2376 static int ext4_ext_convert_to_initialized(handle_t *handle,
2377                                                 struct inode *inode,
2378                                                 struct ext4_ext_path *path,
2379                                                 ext4_lblk_t iblock,
2380                                                 unsigned int max_blocks)
2381 {
2382         struct ext4_extent *ex, newex, orig_ex;
2383         struct ext4_extent *ex1 = NULL;
2384         struct ext4_extent *ex2 = NULL;
2385         struct ext4_extent *ex3 = NULL;
2386         struct ext4_extent_header *eh;
2387         ext4_lblk_t ee_block;
2388         unsigned int allocated, ee_len, depth;
2389         ext4_fsblk_t newblock;
2390         int err = 0;
2391         int ret = 0;
2392
2393         depth = ext_depth(inode);
2394         eh = path[depth].p_hdr;
2395         ex = path[depth].p_ext;
2396         ee_block = le32_to_cpu(ex->ee_block);
2397         ee_len = ext4_ext_get_actual_len(ex);
2398         allocated = ee_len - (iblock - ee_block);
2399         newblock = iblock - ee_block + ext_pblock(ex);
2400         ex2 = ex;
2401         orig_ex.ee_block = ex->ee_block;
2402         orig_ex.ee_len   = cpu_to_le16(ee_len);
2403         ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
2404
2405         err = ext4_ext_get_access(handle, inode, path + depth);
2406         if (err)
2407                 goto out;
2408         /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */
2409         if (ee_len <= 2*EXT4_EXT_ZERO_LEN) {
2410                 err =  ext4_ext_zeroout(inode, &orig_ex);
2411                 if (err)
2412                         goto fix_extent_len;
2413                 /* update the extent length and mark as initialized */
2414                 ex->ee_block = orig_ex.ee_block;
2415                 ex->ee_len   = orig_ex.ee_len;
2416                 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2417                 ext4_ext_dirty(handle, inode, path + depth);
2418                 /* zeroed the full extent */
2419                 return allocated;
2420         }
2421
2422         /* ex1: ee_block to iblock - 1 : uninitialized */
2423         if (iblock > ee_block) {
2424                 ex1 = ex;
2425                 ex1->ee_len = cpu_to_le16(iblock - ee_block);
2426                 ext4_ext_mark_uninitialized(ex1);
2427                 ex2 = &newex;
2428         }
2429         /*
2430          * for sanity, update the length of the ex2 extent before
2431          * we insert ex3, if ex1 is NULL. This is to avoid temporary
2432          * overlap of blocks.
2433          */
2434         if (!ex1 && allocated > max_blocks)
2435                 ex2->ee_len = cpu_to_le16(max_blocks);
2436         /* ex3: to ee_block + ee_len : uninitialised */
2437         if (allocated > max_blocks) {
2438                 unsigned int newdepth;
2439                 /* If extent has less than EXT4_EXT_ZERO_LEN zerout directly */
2440                 if (allocated <= EXT4_EXT_ZERO_LEN) {
2441                         /*
2442                          * iblock == ee_block is handled by the zerouout
2443                          * at the beginning.
2444                          * Mark first half uninitialized.
2445                          * Mark second half initialized and zero out the
2446                          * initialized extent
2447                          */
2448                         ex->ee_block = orig_ex.ee_block;
2449                         ex->ee_len   = cpu_to_le16(ee_len - allocated);
2450                         ext4_ext_mark_uninitialized(ex);
2451                         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2452                         ext4_ext_dirty(handle, inode, path + depth);
2453
2454                         ex3 = &newex;
2455                         ex3->ee_block = cpu_to_le32(iblock);
2456                         ext4_ext_store_pblock(ex3, newblock);
2457                         ex3->ee_len = cpu_to_le16(allocated);
2458                         err = ext4_ext_insert_extent(handle, inode, path, ex3);
2459                         if (err == -ENOSPC) {
2460                                 err =  ext4_ext_zeroout(inode, &orig_ex);
2461                                 if (err)
2462                                         goto fix_extent_len;
2463                                 ex->ee_block = orig_ex.ee_block;
2464                                 ex->ee_len   = orig_ex.ee_len;
2465                                 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2466                                 ext4_ext_dirty(handle, inode, path + depth);
2467                                 /* blocks available from iblock */
2468                                 return allocated;
2469
2470                         } else if (err)
2471                                 goto fix_extent_len;
2472
2473                         /*
2474                          * We need to zero out the second half because
2475                          * an fallocate request can update file size and
2476                          * converting the second half to initialized extent
2477                          * implies that we can leak some junk data to user
2478                          * space.
2479                          */
2480                         err =  ext4_ext_zeroout(inode, ex3);
2481                         if (err) {
2482                                 /*
2483                                  * We should actually mark the
2484                                  * second half as uninit and return error
2485                                  * Insert would have changed the extent
2486                                  */
2487                                 depth = ext_depth(inode);
2488                                 ext4_ext_drop_refs(path);
2489                                 path = ext4_ext_find_extent(inode,
2490                                                                 iblock, path);
2491                                 if (IS_ERR(path)) {
2492                                         err = PTR_ERR(path);
2493                                         return err;
2494                                 }
2495                                 /* get the second half extent details */
2496                                 ex = path[depth].p_ext;
2497                                 err = ext4_ext_get_access(handle, inode,
2498                                                                 path + depth);
2499                                 if (err)
2500                                         return err;
2501                                 ext4_ext_mark_uninitialized(ex);
2502                                 ext4_ext_dirty(handle, inode, path + depth);
2503                                 return err;
2504                         }
2505
2506                         /* zeroed the second half */
2507                         return allocated;
2508                 }
2509                 ex3 = &newex;
2510                 ex3->ee_block = cpu_to_le32(iblock + max_blocks);
2511                 ext4_ext_store_pblock(ex3, newblock + max_blocks);
2512                 ex3->ee_len = cpu_to_le16(allocated - max_blocks);
2513                 ext4_ext_mark_uninitialized(ex3);
2514                 err = ext4_ext_insert_extent(handle, inode, path, ex3);
2515                 if (err == -ENOSPC) {
2516                         err =  ext4_ext_zeroout(inode, &orig_ex);
2517                         if (err)
2518                                 goto fix_extent_len;
2519                         /* update the extent length and mark as initialized */
2520                         ex->ee_block = orig_ex.ee_block;
2521                         ex->ee_len   = orig_ex.ee_len;
2522                         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2523                         ext4_ext_dirty(handle, inode, path + depth);
2524                         /* zeroed the full extent */
2525                         /* blocks available from iblock */
2526                         return allocated;
2527
2528                 } else if (err)
2529                         goto fix_extent_len;
2530                 /*
2531                  * The depth, and hence eh & ex might change
2532                  * as part of the insert above.
2533                  */
2534                 newdepth = ext_depth(inode);
2535                 /*
2536                  * update the extent length after successful insert of the
2537                  * split extent
2538                  */
2539                 orig_ex.ee_len = cpu_to_le16(ee_len -
2540                                                 ext4_ext_get_actual_len(ex3));
2541                 depth = newdepth;
2542                 ext4_ext_drop_refs(path);
2543                 path = ext4_ext_find_extent(inode, iblock, path);
2544                 if (IS_ERR(path)) {
2545                         err = PTR_ERR(path);
2546                         goto out;
2547                 }
2548                 eh = path[depth].p_hdr;
2549                 ex = path[depth].p_ext;
2550                 if (ex2 != &newex)
2551                         ex2 = ex;
2552
2553                 err = ext4_ext_get_access(handle, inode, path + depth);
2554                 if (err)
2555                         goto out;
2556
2557                 allocated = max_blocks;
2558
2559                 /* If extent has less than EXT4_EXT_ZERO_LEN and we are trying
2560                  * to insert a extent in the middle zerout directly
2561                  * otherwise give the extent a chance to merge to left
2562                  */
2563                 if (le16_to_cpu(orig_ex.ee_len) <= EXT4_EXT_ZERO_LEN &&
2564                                                         iblock != ee_block) {
2565                         err =  ext4_ext_zeroout(inode, &orig_ex);
2566                         if (err)
2567                                 goto fix_extent_len;
2568                         /* update the extent length and mark as initialized */
2569                         ex->ee_block = orig_ex.ee_block;
2570                         ex->ee_len   = orig_ex.ee_len;
2571                         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2572                         ext4_ext_dirty(handle, inode, path + depth);
2573                         /* zero out the first half */
2574                         /* blocks available from iblock */
2575                         return allocated;
2576                 }
2577         }
2578         /*
2579          * If there was a change of depth as part of the
2580          * insertion of ex3 above, we need to update the length
2581          * of the ex1 extent again here
2582          */
2583         if (ex1 && ex1 != ex) {
2584                 ex1 = ex;
2585                 ex1->ee_len = cpu_to_le16(iblock - ee_block);
2586                 ext4_ext_mark_uninitialized(ex1);
2587                 ex2 = &newex;
2588         }
2589         /* ex2: iblock to iblock + maxblocks-1 : initialised */
2590         ex2->ee_block = cpu_to_le32(iblock);
2591         ext4_ext_store_pblock(ex2, newblock);
2592         ex2->ee_len = cpu_to_le16(allocated);
2593         if (ex2 != ex)
2594                 goto insert;
2595         /*
2596          * New (initialized) extent starts from the first block
2597          * in the current extent. i.e., ex2 == ex
2598          * We have to see if it can be merged with the extent
2599          * on the left.
2600          */
2601         if (ex2 > EXT_FIRST_EXTENT(eh)) {
2602                 /*
2603                  * To merge left, pass "ex2 - 1" to try_to_merge(),
2604                  * since it merges towards right _only_.
2605                  */
2606                 ret = ext4_ext_try_to_merge(inode, path, ex2 - 1);
2607                 if (ret) {
2608                         err = ext4_ext_correct_indexes(handle, inode, path);
2609                         if (err)
2610                                 goto out;
2611                         depth = ext_depth(inode);
2612                         ex2--;
2613                 }
2614         }
2615         /*
2616          * Try to Merge towards right. This might be required
2617          * only when the whole extent is being written to.
2618          * i.e. ex2 == ex and ex3 == NULL.
2619          */
2620         if (!ex3) {
2621                 ret = ext4_ext_try_to_merge(inode, path, ex2);
2622                 if (ret) {
2623                         err = ext4_ext_correct_indexes(handle, inode, path);
2624                         if (err)
2625                                 goto out;
2626                 }
2627         }
2628         /* Mark modified extent as dirty */
2629         err = ext4_ext_dirty(handle, inode, path + depth);
2630         goto out;
2631 insert:
2632         err = ext4_ext_insert_extent(handle, inode, path, &newex);
2633         if (err == -ENOSPC) {
2634                 err =  ext4_ext_zeroout(inode, &orig_ex);
2635                 if (err)
2636                         goto fix_extent_len;
2637                 /* update the extent length and mark as initialized */
2638                 ex->ee_block = orig_ex.ee_block;
2639                 ex->ee_len   = orig_ex.ee_len;
2640                 ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2641                 ext4_ext_dirty(handle, inode, path + depth);
2642                 /* zero out the first half */
2643                 return allocated;
2644         } else if (err)
2645                 goto fix_extent_len;
2646 out:
2647         return err ? err : allocated;
2648
2649 fix_extent_len:
2650         ex->ee_block = orig_ex.ee_block;
2651         ex->ee_len   = orig_ex.ee_len;
2652         ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
2653         ext4_ext_mark_uninitialized(ex);
2654         ext4_ext_dirty(handle, inode, path + depth);
2655         return err;
2656 }
2657
2658 /*
2659  * Block allocation/map/preallocation routine for extents based files
2660  *
2661  *
2662  * Need to be called with
2663  * down_read(&EXT4_I(inode)->i_data_sem) if not allocating file system block
2664  * (ie, create is zero). Otherwise down_write(&EXT4_I(inode)->i_data_sem)
2665  *
2666  * return > 0, number of of blocks already mapped/allocated
2667  *          if create == 0 and these are pre-allocated blocks
2668  *              buffer head is unmapped
2669  *          otherwise blocks are mapped
2670  *
2671  * return = 0, if plain look up failed (blocks have not been allocated)
2672  *          buffer head is unmapped
2673  *
2674  * return < 0, error case.
2675  */
2676 int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
2677                         ext4_lblk_t iblock,
2678                         unsigned int max_blocks, struct buffer_head *bh_result,
2679                         int create, int extend_disksize)
2680 {
2681         struct ext4_ext_path *path = NULL;
2682         struct ext4_extent_header *eh;
2683         struct ext4_extent newex, *ex;
2684         ext4_fsblk_t newblock;
2685         int err = 0, depth, ret, cache_type;
2686         unsigned int allocated = 0;
2687         struct ext4_allocation_request ar;
2688         loff_t disksize;
2689
2690         __clear_bit(BH_New, &bh_result->b_state);
2691         ext_debug("blocks %u/%u requested for inode %u\n",
2692                         iblock, max_blocks, inode->i_ino);
2693
2694         /* check in cache */
2695         cache_type = ext4_ext_in_cache(inode, iblock, &newex);
2696         if (cache_type) {
2697                 if (cache_type == EXT4_EXT_CACHE_GAP) {
2698                         if (!create) {
2699                                 /*
2700                                  * block isn't allocated yet and
2701                                  * user doesn't want to allocate it
2702                                  */
2703                                 goto out2;
2704                         }
2705                         /* we should allocate requested block */
2706                 } else if (cache_type == EXT4_EXT_CACHE_EXTENT) {
2707                         /* block is already allocated */
2708                         newblock = iblock
2709                                    - le32_to_cpu(newex.ee_block)
2710                                    + ext_pblock(&newex);
2711                         /* number of remaining blocks in the extent */
2712                         allocated = ext4_ext_get_actual_len(&newex) -
2713                                         (iblock - le32_to_cpu(newex.ee_block));
2714                         goto out;
2715                 } else {
2716                         BUG();
2717                 }
2718         }
2719
2720         /* find extent for this block */
2721         path = ext4_ext_find_extent(inode, iblock, NULL);
2722         if (IS_ERR(path)) {
2723                 err = PTR_ERR(path);
2724                 path = NULL;
2725                 goto out2;
2726         }
2727
2728         depth = ext_depth(inode);
2729
2730         /*
2731          * consistent leaf must not be empty;
2732          * this situation is possible, though, _during_ tree modification;
2733          * this is why assert can't be put in ext4_ext_find_extent()
2734          */
2735         BUG_ON(path[depth].p_ext == NULL && depth != 0);
2736         eh = path[depth].p_hdr;
2737
2738         ex = path[depth].p_ext;
2739         if (ex) {
2740                 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
2741                 ext4_fsblk_t ee_start = ext_pblock(ex);
2742                 unsigned short ee_len;
2743
2744                 /*
2745                  * Uninitialized extents are treated as holes, except that
2746                  * we split out initialized portions during a write.
2747                  */
2748                 ee_len = ext4_ext_get_actual_len(ex);
2749                 /* if found extent covers block, simply return it */
2750                 if (iblock >= ee_block && iblock < ee_block + ee_len) {
2751                         newblock = iblock - ee_block + ee_start;
2752                         /* number of remaining blocks in the extent */
2753                         allocated = ee_len - (iblock - ee_block);
2754                         ext_debug("%u fit into %lu:%d -> %llu\n", iblock,
2755                                         ee_block, ee_len, newblock);
2756
2757                         /* Do not put uninitialized extent in the cache */
2758                         if (!ext4_ext_is_uninitialized(ex)) {
2759                                 ext4_ext_put_in_cache(inode, ee_block,
2760                                                         ee_len, ee_start,
2761                                                         EXT4_EXT_CACHE_EXTENT);
2762                                 goto out;
2763                         }
2764                         if (create == EXT4_CREATE_UNINITIALIZED_EXT)
2765                                 goto out;
2766                         if (!create) {
2767                                 /*
2768                                  * We have blocks reserved already.  We
2769                                  * return allocated blocks so that delalloc
2770                                  * won't do block reservation for us.  But
2771                                  * the buffer head will be unmapped so that
2772                                  * a read from the block returns 0s.
2773                                  */
2774                                 if (allocated > max_blocks)
2775                                         allocated = max_blocks;
2776                                 set_buffer_unwritten(bh_result);
2777                                 goto out2;
2778                         }
2779
2780                         ret = ext4_ext_convert_to_initialized(handle, inode,
2781                                                                 path, iblock,
2782                                                                 max_blocks);
2783                         if (ret <= 0) {
2784                                 err = ret;
2785                                 goto out2;
2786                         } else
2787                                 allocated = ret;
2788                         goto outnew;
2789                 }
2790         }
2791
2792         /*
2793          * requested block isn't allocated yet;
2794          * we couldn't try to create block if create flag is zero
2795          */
2796         if (!create) {
2797                 /*
2798                  * put just found gap into cache to speed up
2799                  * subsequent requests
2800                  */
2801                 ext4_ext_put_gap_in_cache(inode, path, iblock);
2802                 goto out2;
2803         }
2804         /*
2805          * Okay, we need to do block allocation.
2806          */
2807
2808         /* find neighbour allocated blocks */
2809         ar.lleft = iblock;
2810         err = ext4_ext_search_left(inode, path, &ar.lleft, &ar.pleft);
2811         if (err)
2812                 goto out2;
2813         ar.lright = iblock;
2814         err = ext4_ext_search_right(inode, path, &ar.lright, &ar.pright);
2815         if (err)
2816                 goto out2;
2817
2818         /*
2819          * See if request is beyond maximum number of blocks we can have in
2820          * a single extent. For an initialized extent this limit is
2821          * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is
2822          * EXT_UNINIT_MAX_LEN.
2823          */
2824         if (max_blocks > EXT_INIT_MAX_LEN &&
2825             create != EXT4_CREATE_UNINITIALIZED_EXT)
2826                 max_blocks = EXT_INIT_MAX_LEN;
2827         else if (max_blocks > EXT_UNINIT_MAX_LEN &&
2828                  create == EXT4_CREATE_UNINITIALIZED_EXT)
2829                 max_blocks = EXT_UNINIT_MAX_LEN;
2830
2831         /* Check if we can really insert (iblock)::(iblock+max_blocks) extent */
2832         newex.ee_block = cpu_to_le32(iblock);
2833         newex.ee_len = cpu_to_le16(max_blocks);
2834         err = ext4_ext_check_overlap(inode, &newex, path);
2835         if (err)
2836                 allocated = ext4_ext_get_actual_len(&newex);
2837         else
2838                 allocated = max_blocks;
2839
2840         /* allocate new block */
2841         ar.inode = inode;
2842         ar.goal = ext4_ext_find_goal(inode, path, iblock);
2843         ar.logical = iblock;
2844         ar.len = allocated;
2845         if (S_ISREG(inode->i_mode))
2846                 ar.flags = EXT4_MB_HINT_DATA;
2847         else
2848                 /* disable in-core preallocation for non-regular files */
2849                 ar.flags = 0;
2850         newblock = ext4_mb_new_blocks(handle, &ar, &err);
2851         if (!newblock)
2852                 goto out2;
2853         ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
2854                   ar.goal, newblock, allocated);
2855
2856         /* try to insert new extent into found leaf and return */
2857         ext4_ext_store_pblock(&newex, newblock);
2858         newex.ee_len = cpu_to_le16(ar.len);
2859         if (create == EXT4_CREATE_UNINITIALIZED_EXT)  /* Mark uninitialized */
2860                 ext4_ext_mark_uninitialized(&newex);
2861         err = ext4_ext_insert_extent(handle, inode, path, &newex);
2862         if (err) {
2863                 /* free data blocks we just allocated */
2864                 /* not a good idea to call discard here directly,
2865                  * but otherwise we'd need to call it every free() */
2866                 ext4_discard_preallocations(inode);
2867                 ext4_free_blocks(handle, inode, ext_pblock(&newex),
2868                                         ext4_ext_get_actual_len(&newex), 0);
2869                 goto out2;
2870         }
2871
2872         /* previous routine could use block we allocated */
2873         newblock = ext_pblock(&newex);
2874         allocated = ext4_ext_get_actual_len(&newex);
2875 outnew:
2876         if (extend_disksize) {
2877                 disksize = ((loff_t) iblock + ar.len) << inode->i_blkbits;
2878                 if (disksize > i_size_read(inode))
2879                         disksize = i_size_read(inode);
2880                 if (disksize > EXT4_I(inode)->i_disksize)
2881                         EXT4_I(inode)->i_disksize = disksize;
2882         }
2883
2884         set_buffer_new(bh_result);
2885
2886         /* Cache only when it is _not_ an uninitialized extent */
2887         if (create != EXT4_CREATE_UNINITIALIZED_EXT)
2888                 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
2889                                                 EXT4_EXT_CACHE_EXTENT);
2890 out:
2891         if (allocated > max_blocks)
2892                 allocated = max_blocks;
2893         ext4_ext_show_leaf(inode, path);
2894         set_buffer_mapped(bh_result);
2895         bh_result->b_bdev = inode->i_sb->s_bdev;
2896         bh_result->b_blocknr = newblock;
2897 out2:
2898         if (path) {
2899                 ext4_ext_drop_refs(path);
2900                 kfree(path);
2901         }
2902         return err ? err : allocated;
2903 }
2904
2905 void ext4_ext_truncate(struct inode *inode)
2906 {
2907         struct address_space *mapping = inode->i_mapping;
2908         struct super_block *sb = inode->i_sb;
2909         ext4_lblk_t last_block;
2910         handle_t *handle;
2911         int err = 0;
2912
2913         /*
2914          * probably first extent we're gonna free will be last in block
2915          */
2916         err = ext4_writepage_trans_blocks(inode);
2917         handle = ext4_journal_start(inode, err);
2918         if (IS_ERR(handle))
2919                 return;
2920
2921         if (inode->i_size & (sb->s_blocksize - 1))
2922                 ext4_block_truncate_page(handle, mapping, inode->i_size);
2923
2924         if (ext4_orphan_add(handle, inode))
2925                 goto out_stop;
2926
2927         down_write(&EXT4_I(inode)->i_data_sem);
2928         ext4_ext_invalidate_cache(inode);
2929
2930         ext4_discard_preallocations(inode);
2931
2932         /*
2933          * TODO: optimization is possible here.
2934          * Probably we need not scan at all,
2935          * because page truncation is enough.
2936          */
2937
2938         /* we have to know where to truncate from in crash case */
2939         EXT4_I(inode)->i_disksize = inode->i_size;
2940         ext4_mark_inode_dirty(handle, inode);
2941
2942         last_block = (inode->i_size + sb->s_blocksize - 1)
2943                         >> EXT4_BLOCK_SIZE_BITS(sb);
2944         err = ext4_ext_remove_space(inode, last_block);
2945
2946         /* In a multi-transaction truncate, we only make the final
2947          * transaction synchronous.
2948          */
2949         if (IS_SYNC(inode))
2950                 ext4_handle_sync(handle);
2951
2952 out_stop:
2953         up_write(&EXT4_I(inode)->i_data_sem);
2954         /*
2955          * If this was a simple ftruncate() and the file will remain alive,
2956          * then we need to clear up the orphan record which we created above.
2957          * However, if this was a real unlink then we were called by
2958          * ext4_delete_inode(), and we allow that function to clean up the
2959          * orphan info for us.
2960          */
2961         if (inode->i_nlink)
2962                 ext4_orphan_del(handle, inode);
2963
2964         inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
2965         ext4_mark_inode_dirty(handle, inode);
2966         ext4_journal_stop(handle);
2967 }
2968
2969 static void ext4_falloc_update_inode(struct inode *inode,
2970                                 int mode, loff_t new_size, int update_ctime)
2971 {
2972         struct timespec now;
2973
2974         if (update_ctime) {
2975                 now = current_fs_time(inode->i_sb);
2976                 if (!timespec_equal(&inode->i_ctime, &now))
2977                         inode->i_ctime = now;
2978         }
2979         /*
2980          * Update only when preallocation was requested beyond
2981          * the file size.
2982          */
2983         if (!(mode & FALLOC_FL_KEEP_SIZE)) {
2984                 if (new_size > i_size_read(inode))
2985                         i_size_write(inode, new_size);
2986                 if (new_size > EXT4_I(inode)->i_disksize)
2987                         ext4_update_i_disksize(inode, new_size);
2988         }
2989
2990 }
2991
2992 /*
2993  * preallocate space for a file. This implements ext4's fallocate inode
2994  * operation, which gets called from sys_fallocate system call.
2995  * For block-mapped files, posix_fallocate should fall back to the method
2996  * of writing zeroes to the required new blocks (the same behavior which is
2997  * expected for file systems which do not support fallocate() system call).
2998  */
2999 long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
3000 {
3001         handle_t *handle;
3002         ext4_lblk_t block;
3003         loff_t new_size;
3004         unsigned int max_blocks;
3005         int ret = 0;
3006         int ret2 = 0;
3007         int retries = 0;
3008         struct buffer_head map_bh;
3009         unsigned int credits, blkbits = inode->i_blkbits;
3010
3011         /*
3012          * currently supporting (pre)allocate mode for extent-based
3013          * files _only_
3014          */
3015         if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
3016                 return -EOPNOTSUPP;
3017
3018         /* preallocation to directories is currently not supported */
3019         if (S_ISDIR(inode->i_mode))
3020                 return -ENODEV;
3021
3022         block = offset >> blkbits;
3023         /*
3024          * We can't just convert len to max_blocks because
3025          * If blocksize = 4096 offset = 3072 and len = 2048
3026          */
3027         max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
3028                                                         - block;
3029         /*
3030          * credits to insert 1 extent into extent tree
3031          */
3032         credits = ext4_chunk_trans_blocks(inode, max_blocks);
3033         mutex_lock(&inode->i_mutex);
3034 retry:
3035         while (ret >= 0 && ret < max_blocks) {
3036                 block = block + ret;
3037                 max_blocks = max_blocks - ret;
3038                 handle = ext4_journal_start(inode, credits);
3039                 if (IS_ERR(handle)) {
3040                         ret = PTR_ERR(handle);
3041                         break;
3042                 }
3043                 ret = ext4_get_blocks_wrap(handle, inode, block,
3044                                           max_blocks, &map_bh,
3045                                           EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
3046                 if (ret <= 0) {
3047 #ifdef EXT4FS_DEBUG
3048                         WARN_ON(ret <= 0);
3049                         printk(KERN_ERR "%s: ext4_ext_get_blocks "
3050                                     "returned error inode#%lu, block=%u, "
3051                                     "max_blocks=%u", __func__,
3052                                     inode->i_ino, block, max_blocks);
3053 #endif
3054                         ext4_mark_inode_dirty(handle, inode);
3055                         ret2 = ext4_journal_stop(handle);
3056                         break;
3057                 }
3058                 if ((block + ret) >= (EXT4_BLOCK_ALIGN(offset + len,
3059                                                 blkbits) >> blkbits))
3060                         new_size = offset + len;
3061                 else
3062                         new_size = (block + ret) << blkbits;
3063
3064                 ext4_falloc_update_inode(inode, mode, new_size,
3065                                                 buffer_new(&map_bh));
3066                 ext4_mark_inode_dirty(handle, inode);
3067                 ret2 = ext4_journal_stop(handle);
3068                 if (ret2)
3069                         break;
3070         }
3071         if (ret == -ENOSPC &&
3072                         ext4_should_retry_alloc(inode->i_sb, &retries)) {
3073                 ret = 0;
3074                 goto retry;
3075         }
3076         mutex_unlock(&inode->i_mutex);
3077         return ret > 0 ? ret2 : ret;
3078 }
3079
3080 /*
3081  * Callback function called for each extent to gather FIEMAP information.
3082  */
3083 static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
3084                        struct ext4_ext_cache *newex, struct ext4_extent *ex,
3085                        void *data)
3086 {
3087         struct fiemap_extent_info *fieinfo = data;
3088         unsigned long blksize_bits = inode->i_sb->s_blocksize_bits;
3089         __u64   logical;
3090         __u64   physical;
3091         __u64   length;
3092         __u32   flags = 0;
3093         int     error;
3094
3095         logical =  (__u64)newex->ec_block << blksize_bits;
3096
3097         if (newex->ec_type == EXT4_EXT_CACHE_GAP) {
3098                 pgoff_t offset;
3099                 struct page *page;
3100                 struct buffer_head *bh = NULL;
3101
3102                 offset = logical >> PAGE_SHIFT;
3103                 page = find_get_page(inode->i_mapping, offset);
3104                 if (!page || !page_has_buffers(page))
3105                         return EXT_CONTINUE;
3106
3107                 bh = page_buffers(page);
3108
3109                 if (!bh)
3110                         return EXT_CONTINUE;
3111
3112                 if (buffer_delay(bh)) {
3113                         flags |= FIEMAP_EXTENT_DELALLOC;
3114                         page_cache_release(page);
3115                 } else {
3116                         page_cache_release(page);
3117                         return EXT_CONTINUE;
3118                 }
3119         }
3120
3121         physical = (__u64)newex->ec_start << blksize_bits;
3122         length =   (__u64)newex->ec_len << blksize_bits;
3123
3124         if (ex && ext4_ext_is_uninitialized(ex))
3125                 flags |= FIEMAP_EXTENT_UNWRITTEN;
3126
3127         /*
3128          * If this extent reaches EXT_MAX_BLOCK, it must be last.
3129          *
3130          * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK,
3131          * this also indicates no more allocated blocks.
3132          *
3133          * XXX this might miss a single-block extent at EXT_MAX_BLOCK
3134          */
3135         if (logical + length - 1 == EXT_MAX_BLOCK ||
3136             ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK)
3137                 flags |= FIEMAP_EXTENT_LAST;
3138
3139         error = fiemap_fill_next_extent(fieinfo, logical, physical,
3140                                         length, flags);
3141         if (error < 0)
3142                 return error;
3143         if (error == 1)
3144                 return EXT_BREAK;
3145
3146         return EXT_CONTINUE;
3147 }
3148
3149 /* fiemap flags we can handle specified here */
3150 #define EXT4_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
3151
3152 static int ext4_xattr_fiemap(struct inode *inode,
3153                                 struct fiemap_extent_info *fieinfo)
3154 {
3155         __u64 physical = 0;
3156         __u64 length;
3157         __u32 flags = FIEMAP_EXTENT_LAST;
3158         int blockbits = inode->i_sb->s_blocksize_bits;
3159         int error = 0;
3160
3161         /* in-inode? */
3162         if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
3163                 struct ext4_iloc iloc;
3164                 int offset;     /* offset of xattr in inode */
3165
3166                 error = ext4_get_inode_loc(inode, &iloc);
3167                 if (error)
3168                         return error;
3169                 physical = iloc.bh->b_blocknr << blockbits;
3170                 offset = EXT4_GOOD_OLD_INODE_SIZE +
3171                                 EXT4_I(inode)->i_extra_isize;
3172                 physical += offset;
3173                 length = EXT4_SB(inode->i_sb)->s_inode_size - offset;
3174                 flags |= FIEMAP_EXTENT_DATA_INLINE;
3175         } else { /* external block */
3176                 physical = EXT4_I(inode)->i_file_acl << blockbits;
3177                 length = inode->i_sb->s_blocksize;
3178         }
3179
3180         if (physical)
3181                 error = fiemap_fill_next_extent(fieinfo, 0, physical,
3182                                                 length, flags);
3183         return (error < 0 ? error : 0);
3184 }
3185
3186 int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3187                 __u64 start, __u64 len)
3188 {
3189         ext4_lblk_t start_blk;
3190         ext4_lblk_t len_blks;
3191         int error = 0;
3192
3193         /* fallback to generic here if not in extents fmt */
3194         if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL))
3195                 return generic_block_fiemap(inode, fieinfo, start, len,
3196                         ext4_get_block);
3197
3198         if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
3199                 return -EBADR;
3200
3201         if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
3202                 error = ext4_xattr_fiemap(inode, fieinfo);
3203         } else {
3204                 start_blk = start >> inode->i_sb->s_blocksize_bits;
3205                 len_blks = len >> inode->i_sb->s_blocksize_bits;
3206
3207                 /*
3208                  * Walk the extent tree gathering extent information.
3209                  * ext4_ext_fiemap_cb will push extents back to user.
3210                  */
3211                 down_write(&EXT4_I(inode)->i_data_sem);
3212                 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3213                                           ext4_ext_fiemap_cb, fieinfo);
3214                 up_write(&EXT4_I(inode)->i_data_sem);
3215         }
3216
3217         return error;
3218 }
3219