diff options
author | Filipe Manana <fdmanana@suse.com> | 2019-07-05 11:09:50 +0100 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2019-09-09 14:58:58 +0200 |
commit | 690a5dbfc5131572910e6350d65d7b9d55439817 (patch) | |
tree | b0c45a23efc2fc41a5ace2b9f4b343a65e576166 /fs/btrfs/ioctl.c | |
parent | 9cba40a693e69badb567d6ce0eaa0150f25c3d39 (diff) | |
download | linux-690a5dbfc5131572910e6350d65d7b9d55439817.tar.gz |
Btrfs: fix ENOSPC errors, leading to transaction aborts, when cloning extents
When cloning extents (or deduplicating) we create a transaction with a
space reservation that considers we will drop or update a single file
extent item of the destination inode (that we modify a single leaf). That
is fine for the vast majority of scenarios, however it might happen that
we need to drop many file extent items, and adjust at most two file extent
items, in the destination root, which can span multiple leafs. This will
lead to either the call to btrfs_drop_extents() to fail with ENOSPC or
the subsequent calls to btrfs_insert_empty_item() or btrfs_update_inode()
(called through clone_finish_inode_update()) to fail with ENOSPC. Such
failure results in a transaction abort, leaving the filesystem in a
read-only mode.
In order to fix this we need to follow the same approach as the hole
punching code, where we create a local reservation with 1 unit and keep
ending and starting transactions, after balancing the btree inode,
when __btrfs_drop_extents() returns ENOSPC. So fix this by making the
extent cloning call calls the recently added btrfs_punch_hole_range()
helper, which is what does the mentioned work for hole punching, and
make sure whenever we drop extent items in a transaction, we also add a
replacing file extent item, to avoid corruption (a hole) if after ending
a transaction and before starting a new one, the old transaction gets
committed and a power failure happens before we finish cloning.
A test case for fstests follows soon.
Reported-by: David Goodwin <david@codepoets.co.uk>
Link: https://lore.kernel.org/linux-btrfs/a4a4cf31-9cf4-e52c-1f86-c62d336c9cd1@codepoets.co.uk/
Reported-by: Sam Tygier <sam@tygier.co.uk>
Link: https://lore.kernel.org/linux-btrfs/82aace9f-a1e3-1f0b-055f-3ea75f7a41a0@tygier.co.uk/
Fixes: b6f3409b2197e8f ("Btrfs: reserve sufficient space for ioctl clone")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/ioctl.c')
-rw-r--r-- | fs/btrfs/ioctl.c | 195 |
1 files changed, 43 insertions, 152 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 818f7ec8bb0e..0a0c54e99b22 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3324,61 +3324,6 @@ out: return ret; } -static void clone_update_extent_map(struct btrfs_inode *inode, - const struct btrfs_trans_handle *trans, - const struct btrfs_path *path, - const u64 hole_offset, - const u64 hole_len) -{ - struct extent_map_tree *em_tree = &inode->extent_tree; - struct extent_map *em; - int ret; - - em = alloc_extent_map(); - if (!em) { - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); - return; - } - - if (path) { - struct btrfs_file_extent_item *fi; - - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - btrfs_extent_item_to_extent_map(inode, path, fi, false, em); - em->generation = -1; - if (btrfs_file_extent_type(path->nodes[0], fi) == - BTRFS_FILE_EXTENT_INLINE) - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, - &inode->runtime_flags); - } else { - em->start = hole_offset; - em->len = hole_len; - em->ram_bytes = em->len; - em->orig_start = hole_offset; - em->block_start = EXTENT_MAP_HOLE; - em->block_len = 0; - em->orig_block_len = 0; - em->compress_type = BTRFS_COMPRESS_NONE; - em->generation = trans->transid; - } - - while (1) { - write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em, 1); - write_unlock(&em_tree->lock); - if (ret != -EEXIST) { - free_extent_map(em); - break; - } - btrfs_drop_extent_cache(inode, em->start, - em->start + em->len - 1, 0); - } - - if (ret) - set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); -} - /* * Make sure we do not end up inserting an inline extent into a file that has * already other (non-inline) extents. If a file has an inline extent it can @@ -3519,6 +3464,7 @@ copy_inline_extent: path->slots[0]), size); inode_add_bytes(dst, datal); + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags); return 0; } @@ -3678,19 +3624,10 @@ process_slot: else drop_start = new_key.offset; - /* - * 1 - adjusting old extent (we may have to split it) - * 1 - add new extent - * 1 - inode update - */ - trans = btrfs_start_transaction(root, 3); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - if (type == BTRFS_FILE_EXTENT_REG || type == BTRFS_FILE_EXTENT_PREALLOC) { + struct btrfs_clone_extent_info clone_info; + /* * a | --- range to clone ---| b * | ------------- extent ------------- | @@ -3706,63 +3643,19 @@ process_slot: datal -= off - key.offset; } - ret = btrfs_drop_extents(trans, root, inode, - drop_start, - new_key.offset + datal, - 1); - if (ret) { - if (ret != -EOPNOTSUPP) - btrfs_abort_transaction(trans, - ret); - btrfs_end_transaction(trans); + clone_info.disk_offset = disko; + clone_info.disk_len = diskl; + clone_info.data_offset = datao; + clone_info.data_len = datal; + clone_info.file_offset = new_key.offset; + clone_info.extent_buf = buf; + clone_info.item_size = size; + ret = btrfs_punch_hole_range(inode, path, + drop_start, + new_key.offset + datal - 1, + &clone_info, &trans); + if (ret) goto out; - } - - ret = btrfs_insert_empty_item(trans, root, path, - &new_key, size); - if (ret) { - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); - goto out; - } - - leaf = path->nodes[0]; - slot = path->slots[0]; - write_extent_buffer(leaf, buf, - btrfs_item_ptr_offset(leaf, slot), - size); - - extent = btrfs_item_ptr(leaf, slot, - struct btrfs_file_extent_item); - - /* disko == 0 means it's a hole */ - if (!disko) - datao = 0; - - btrfs_set_file_extent_offset(leaf, extent, - datao); - btrfs_set_file_extent_num_bytes(leaf, extent, - datal); - - if (disko) { - struct btrfs_ref ref = { 0 }; - inode_add_bytes(inode, datal); - btrfs_init_generic_ref(&ref, - BTRFS_ADD_DELAYED_REF, disko, - diskl, 0); - btrfs_init_data_ref(&ref, - root->root_key.objectid, - btrfs_ino(BTRFS_I(inode)), - new_key.offset - datao); - ret = btrfs_inc_extent_ref(trans, &ref); - if (ret) { - btrfs_abort_transaction(trans, - ret); - btrfs_end_transaction(trans); - goto out; - - } - } } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; @@ -3777,12 +3670,27 @@ process_slot: if (comp && (skip || trim)) { ret = -EINVAL; - btrfs_end_transaction(trans); goto out; } size -= skip + trim; datal -= skip + trim; + /* + * If our extent is inline, we know we will drop + * or adjust at most 1 extent item in the + * destination root. + * + * 1 - adjusting old extent (we may have to + * split it) + * 1 - add new extent + * 1 - inode update + */ + trans = btrfs_start_transaction(root, 3); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + ret = clone_copy_inline_extent(inode, trans, path, &new_key, @@ -3796,20 +3704,8 @@ process_slot: btrfs_end_transaction(trans); goto out; } - leaf = path->nodes[0]; - slot = path->slots[0]; } - /* If we have an implicit hole (NO_HOLES feature). */ - if (drop_start < new_key.offset) - clone_update_extent_map(BTRFS_I(inode), trans, - NULL, drop_start, - new_key.offset - drop_start); - - clone_update_extent_map(BTRFS_I(inode), trans, - path, 0, 0); - - btrfs_mark_buffer_dirty(leaf); btrfs_release_path(path); last_dest_end = ALIGN(new_key.offset + datal, @@ -3834,32 +3730,27 @@ process_slot: ret = 0; if (last_dest_end < destoff + len) { + struct btrfs_clone_extent_info clone_info = { 0 }; /* * We have an implicit hole (NO_HOLES feature is enabled) that * fully or partially overlaps our cloning range at its end. */ btrfs_release_path(path); + path->leave_spinning = 0; /* - * 1 - remove extent(s) - * 1 - inode update + * We are dealing with a hole and our clone_info already has a + * disk_offset of 0, we only need to fill the data length and + * file offset. */ - trans = btrfs_start_transaction(root, 2); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - goto out; - } - ret = btrfs_drop_extents(trans, root, inode, - last_dest_end, destoff + len, 1); - if (ret) { - if (ret != -EOPNOTSUPP) - btrfs_abort_transaction(trans, ret); - btrfs_end_transaction(trans); + clone_info.data_len = destoff + len - last_dest_end; + clone_info.file_offset = last_dest_end; + ret = btrfs_punch_hole_range(inode, path, + last_dest_end, destoff + len - 1, + &clone_info, &trans); + if (ret) goto out; - } - clone_update_extent_map(BTRFS_I(inode), trans, NULL, - last_dest_end, - destoff + len - last_dest_end); + ret = clone_finish_inode_update(trans, inode, destoff + len, destoff, olen, no_time_update); } |