summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Rini <trini@konsulko.com>2020-09-07 21:00:47 -0400
committerTom Rini <trini@konsulko.com>2020-09-07 21:00:47 -0400
commit2a9f9d633d2e069e5d5e7acde050b338ec803692 (patch)
tree40c97310da83790361c498e158ee3814e8e140a7
parent314b9b4a38befd120ce1c566d9eea8e0ec9d8336 (diff)
parentb737c822c0c4f1210568f61e743236bb980ca645 (diff)
downloadu-boot-WIP/08Sep2020-next.tar.gz
Merge branch '2020-09-14-btrfs-rewrite' into nextWIP/08Sep2020-next
- Bring in the update to btrfs support that rewrites it based on btrfs-progs.
-rw-r--r--MAINTAINERS2
-rw-r--r--fs/btrfs/Makefile5
-rw-r--r--fs/btrfs/btrfs.c319
-rw-r--r--fs/btrfs/btrfs.h67
-rw-r--r--fs/btrfs/btrfs_tree.h766
-rw-r--r--fs/btrfs/chunk-map.c178
-rw-r--r--fs/btrfs/common/rbtree-utils.c83
-rw-r--r--fs/btrfs/common/rbtree-utils.h53
-rw-r--r--fs/btrfs/compat.h82
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/crypto/hash.c55
-rw-r--r--fs/btrfs/crypto/hash.h17
-rw-r--r--fs/btrfs/ctree.c859
-rw-r--r--fs/btrfs/ctree.h1453
-rw-r--r--fs/btrfs/dir-item.c192
-rw-r--r--fs/btrfs/disk-io.c1062
-rw-r--r--fs/btrfs/disk-io.h50
-rw-r--r--fs/btrfs/extent-cache.c318
-rw-r--r--fs/btrfs/extent-cache.h104
-rw-r--r--fs/btrfs/extent-io.c847
-rw-r--r--fs/btrfs/extent-io.h164
-rw-r--r--fs/btrfs/hash.c38
-rw-r--r--fs/btrfs/inode.c884
-rw-r--r--fs/btrfs/kernel-shared/btrfs_tree.h1333
-rw-r--r--fs/btrfs/root-tree.c47
-rw-r--r--fs/btrfs/root.c92
-rw-r--r--fs/btrfs/subvolume.c310
-rw-r--r--fs/btrfs/super.c257
-rw-r--r--fs/btrfs/volumes.c1173
-rw-r--r--fs/btrfs/volumes.h204
30 files changed, 8525 insertions, 2491 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 101f4e185d..332112f6fd 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -599,6 +599,8 @@ F: tools/binman/
BTRFS
M: Marek Behun <marek.behun@nic.cz>
+R: Qu Wenruo <wqu@suse.com>
+L: linux-btrfs@vger.kernel.org
S: Maintained
F: cmd/btrfs.c
F: fs/btrfs/
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 9b3159296f..fc074c84d2 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -2,5 +2,6 @@
#
# 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
-obj-y := btrfs.o chunk-map.o compression.o ctree.o dev.o dir-item.o \
- extent-io.o hash.o inode.o root.o subvolume.o super.o
+obj-y := btrfs.o compression.o ctree.o dev.o dir-item.o \
+ extent-io.o inode.o subvolume.o crypto/hash.o disk-io.o \
+ common/rbtree-utils.o extent-cache.o volumes.o root-tree.o
diff --git a/fs/btrfs/btrfs.c b/fs/btrfs/btrfs.c
index de16217d0d..cbf9dcffeb 100644
--- a/fs/btrfs/btrfs.c
+++ b/fs/btrfs/btrfs.c
@@ -5,219 +5,280 @@
* 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
*/
-#include "btrfs.h"
#include <config.h>
#include <malloc.h>
#include <uuid.h>
#include <linux/time.h>
+#include "btrfs.h"
+#include "crypto/hash.h"
+#include "disk-io.h"
-struct btrfs_info btrfs_info;
+struct btrfs_fs_info *current_fs_info;
-static int readdir_callback(const struct btrfs_root *root,
- struct btrfs_dir_item *item)
+static int show_dir(struct btrfs_root *root, struct extent_buffer *eb,
+ struct btrfs_dir_item *di)
{
- static const char typestr[BTRFS_FT_MAX][4] = {
- [BTRFS_FT_UNKNOWN] = " ? ",
- [BTRFS_FT_REG_FILE] = " ",
- [BTRFS_FT_DIR] = "DIR",
- [BTRFS_FT_CHRDEV] = "CHR",
- [BTRFS_FT_BLKDEV] = "BLK",
- [BTRFS_FT_FIFO] = "FIF",
- [BTRFS_FT_SOCK] = "SCK",
- [BTRFS_FT_SYMLINK] = "SYM",
- [BTRFS_FT_XATTR] = " ? ",
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_inode_item ii;
+ struct btrfs_key key;
+ static const char* dir_item_str[] = {
+ [BTRFS_FT_REG_FILE] = "FILE",
+ [BTRFS_FT_DIR] = "DIR",
+ [BTRFS_FT_CHRDEV] = "CHRDEV",
+ [BTRFS_FT_BLKDEV] = "BLKDEV",
+ [BTRFS_FT_FIFO] = "FIFO",
+ [BTRFS_FT_SOCK] = "SOCK",
+ [BTRFS_FT_SYMLINK] = "SYMLINK",
+ [BTRFS_FT_XATTR] = "XATTR"
};
- struct btrfs_inode_item inode;
- const char *name = (const char *) (item + 1);
- char filetime[32], *target = NULL;
+ u8 type = btrfs_dir_type(eb, di);
+ char namebuf[BTRFS_NAME_LEN];
+ char *target = NULL;
+ char filetime[32];
time_t mtime;
+ int ret;
- if (btrfs_lookup_inode(root, &item->location, &inode, NULL)) {
- printf("%s: Cannot find inode item for directory entry %.*s!\n",
- __func__, item->name_len, name);
- return 0;
- }
-
- mtime = inode.mtime.sec;
- ctime_r(&mtime, filetime);
+ btrfs_dir_item_key_to_cpu(eb, di, &key);
- if (item->type == BTRFS_FT_SYMLINK) {
- target = malloc(min(inode.size + 1,
- (u64) btrfs_info.sb.sectorsize));
+ if (key.type == BTRFS_ROOT_ITEM_KEY) {
+ struct btrfs_root *subvol;
- if (target && btrfs_readlink(root, item->location.objectid,
- target)) {
- free(target);
- target = NULL;
+ /* It's a subvolume, get its mtime from root item */
+ subvol = btrfs_read_fs_root(fs_info, &key);
+ if (IS_ERR(subvol)) {
+ ret = PTR_ERR(subvol);
+ error("Can't find root %llu", key.objectid);
+ return ret;
}
+ mtime = btrfs_stack_timespec_sec(&subvol->root_item.otime);
+ } else {
+ struct btrfs_path path;
+
+ /* It's regular inode, get its mtime from inode item */
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret < 0) {
+ error("Can't find inode %llu", key.objectid);
+ btrfs_release_path(&path);
+ return ret;
+ }
+ read_extent_buffer(path.nodes[0], &ii,
+ btrfs_item_ptr_offset(path.nodes[0], path.slots[0]),
+ sizeof(ii));
+ btrfs_release_path(&path);
+ mtime = btrfs_stack_timespec_sec(&ii.mtime);
+ }
+ ctime_r(&mtime, filetime);
- if (!target)
- printf("%s: Cannot read symlink target!\n", __func__);
+ if (type == BTRFS_FT_SYMLINK) {
+ target = malloc(fs_info->sectorsize);
+ if (!target) {
+ error("Can't alloc memory for symlink %llu",
+ key.objectid);
+ return -ENOMEM;
+ }
+ ret = btrfs_readlink(root, key.objectid, target);
+ if (ret < 0) {
+ error("Failed to read symlink %llu", key.objectid);
+ goto out;
+ }
+ target[ret] = '\0';
}
- printf("<%s> ", typestr[item->type]);
- if (item->type == BTRFS_FT_CHRDEV || item->type == BTRFS_FT_BLKDEV)
- printf("%4u,%5u ", (unsigned int) (inode.rdev >> 20),
- (unsigned int) (inode.rdev & 0xfffff));
+ if (type < ARRAY_SIZE(dir_item_str) && dir_item_str[type])
+ printf("<%s> ", dir_item_str[type]);
else
- printf("%10llu ", inode.size);
-
- printf("%24.24s %.*s", filetime, item->name_len, name);
-
- if (item->type == BTRFS_FT_SYMLINK) {
- printf(" -> %s", target ? target : "?");
- if (target)
- free(target);
+ printf("DIR_ITEM.%u", type);
+ if (type == BTRFS_FT_CHRDEV || type == BTRFS_FT_BLKDEV) {
+ ASSERT(key.type == BTRFS_INODE_ITEM_KEY);
+ printf("%4llu,%5llu ", btrfs_stack_inode_rdev(&ii) >> 20,
+ btrfs_stack_inode_rdev(&ii) & 0xfffff);
+ } else {
+ if (key.type == BTRFS_INODE_ITEM_KEY)
+ printf("%10llu ", btrfs_stack_inode_size(&ii));
+ else
+ printf("%10llu ", 0ULL);
}
+ read_extent_buffer(eb, namebuf, (unsigned long)(di + 1),
+ btrfs_dir_name_len(eb, di));
+ printf("%24.24s %.*s", filetime, btrfs_dir_name_len(eb, di), namebuf);
+ if (type == BTRFS_FT_SYMLINK)
+ printf(" -> %s", target ? target : "?");
printf("\n");
-
- return 0;
+out:
+ free(target);
+ return ret;
}
int btrfs_probe(struct blk_desc *fs_dev_desc,
struct disk_partition *fs_partition)
{
- btrfs_blk_desc = fs_dev_desc;
- btrfs_part_info = fs_partition;
-
- memset(&btrfs_info, 0, sizeof(btrfs_info));
+ struct btrfs_fs_info *fs_info;
+ int ret = -1;
btrfs_hash_init();
- if (btrfs_read_superblock())
- return -1;
-
- if (btrfs_chunk_map_init()) {
- printf("%s: failed to init chunk map\n", __func__);
- return -1;
+ fs_info = open_ctree_fs_info(fs_dev_desc, fs_partition);
+ if (fs_info) {
+ current_fs_info = fs_info;
+ ret = 0;
}
-
- btrfs_info.tree_root.objectid = 0;
- btrfs_info.tree_root.bytenr = btrfs_info.sb.root;
- btrfs_info.chunk_root.objectid = 0;
- btrfs_info.chunk_root.bytenr = btrfs_info.sb.chunk_root;
-
- if (btrfs_read_chunk_tree()) {
- printf("%s: failed to read chunk tree\n", __func__);
- return -1;
- }
-
- if (btrfs_find_root(btrfs_get_default_subvol_objectid(),
- &btrfs_info.fs_root, NULL)) {
- printf("%s: failed to find default subvolume\n", __func__);
- return -1;
- }
-
- return 0;
+ return ret;
}
int btrfs_ls(const char *path)
{
- struct btrfs_root root = btrfs_info.fs_root;
- u64 inr;
+ struct btrfs_fs_info *fs_info = current_fs_info;
+ struct btrfs_root *root = fs_info->fs_root;
+ u64 ino = BTRFS_FIRST_FREE_OBJECTID;
u8 type;
+ int ret;
- inr = btrfs_lookup_path(&root, root.root_dirid, path, &type, NULL, 40);
-
- if (inr == -1ULL) {
+ ASSERT(fs_info);
+ ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+ path, &root, &ino, &type, 40);
+ if (ret < 0) {
printf("Cannot lookup path %s\n", path);
- return -1;
+ return ret;
}
if (type != BTRFS_FT_DIR) {
- printf("Not a directory: %s\n", path);
- return -1;
+ error("Not a directory: %s", path);
+ return -ENOENT;
}
-
- if (btrfs_readdir(&root, inr, readdir_callback)) {
- printf("An error occured while listing directory %s\n", path);
- return -1;
+ ret = btrfs_iter_dir(root, ino, show_dir);
+ if (ret < 0) {
+ error("An error occured while listing directory %s", path);
+ return ret;
}
-
return 0;
}
int btrfs_exists(const char *file)
{
- struct btrfs_root root = btrfs_info.fs_root;
- u64 inr;
+ struct btrfs_fs_info *fs_info = current_fs_info;
+ struct btrfs_root *root;
+ u64 ino;
u8 type;
+ int ret;
- inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, NULL, 40);
+ ASSERT(fs_info);
- return (inr != -1ULL && type == BTRFS_FT_REG_FILE);
+ ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+ file, &root, &ino, &type, 40);
+ if (ret < 0)
+ return 0;
+
+ if (type == BTRFS_FT_REG_FILE)
+ return 1;
+ return 0;
}
int btrfs_size(const char *file, loff_t *size)
{
- struct btrfs_root root = btrfs_info.fs_root;
- struct btrfs_inode_item inode;
- u64 inr;
+ struct btrfs_fs_info *fs_info = current_fs_info;
+ struct btrfs_inode_item *ii;
+ struct btrfs_root *root;
+ struct btrfs_path path;
+ struct btrfs_key key;
+ u64 ino;
u8 type;
+ int ret;
- inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, &inode,
- 40);
-
- if (inr == -1ULL) {
+ ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+ file, &root, &ino, &type, 40);
+ if (ret < 0) {
printf("Cannot lookup file %s\n", file);
- return -1;
+ return ret;
}
-
if (type != BTRFS_FT_REG_FILE) {
printf("Not a regular file: %s\n", file);
- return -1;
+ return -ENOENT;
}
-
- *size = inode.size;
- return 0;
+ btrfs_init_path(&path);
+ key.objectid = ino;
+ key.type = BTRFS_INODE_ITEM_KEY;
+ key.offset = 0;
+
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret < 0) {
+ printf("Cannot lookup ino %llu\n", ino);
+ return ret;
+ }
+ if (ret > 0) {
+ printf("Ino %llu does not exist\n", ino);
+ ret = -ENOENT;
+ goto out;
+ }
+ ii = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_inode_item);
+ *size = btrfs_inode_size(path.nodes[0], ii);
+out:
+ btrfs_release_path(&path);
+ return ret;
}
int btrfs_read(const char *file, void *buf, loff_t offset, loff_t len,
loff_t *actread)
{
- struct btrfs_root root = btrfs_info.fs_root;
- struct btrfs_inode_item inode;
- u64 inr, rd;
+ struct btrfs_fs_info *fs_info = current_fs_info;
+ struct btrfs_root *root;
+ loff_t real_size = 0;
+ u64 ino;
u8 type;
-
- inr = btrfs_lookup_path(&root, root.root_dirid, file, &type, &inode,
- 40);
-
- if (inr == -1ULL) {
- printf("Cannot lookup file %s\n", file);
- return -1;
+ int ret;
+
+ ASSERT(fs_info);
+ ret = btrfs_lookup_path(fs_info->fs_root, BTRFS_FIRST_FREE_OBJECTID,
+ file, &root, &ino, &type, 40);
+ if (ret < 0) {
+ error("Cannot lookup file %s", file);
+ return ret;
}
if (type != BTRFS_FT_REG_FILE) {
- printf("Not a regular file: %s\n", file);
- return -1;
+ error("Not a regular file: %s", file);
+ return -EINVAL;
}
- if (!len)
- len = inode.size;
+ if (!len) {
+ ret = btrfs_size(file, &real_size);
+ if (ret < 0) {
+ error("Failed to get inode size: %s", file);
+ return ret;
+ }
+ len = real_size;
+ }
- if (len > inode.size - offset)
- len = inode.size - offset;
+ if (len > real_size - offset)
+ len = real_size - offset;
- rd = btrfs_file_read(&root, inr, offset, len, buf);
- if (rd == -1ULL) {
- printf("An error occured while reading file %s\n", file);
- return -1;
+ ret = btrfs_file_read(root, ino, offset, len, buf);
+ if (ret < 0) {
+ error("An error occured while reading file %s", file);
+ return ret;
}
- *actread = rd;
+ *actread = len;
return 0;
}
void btrfs_close(void)
{
- btrfs_chunk_map_exit();
+ if (current_fs_info) {
+ close_ctree_fs_info(current_fs_info);
+ current_fs_info = NULL;
+ }
}
int btrfs_uuid(char *uuid_str)
{
#ifdef CONFIG_LIB_UUID
- uuid_bin_to_str(btrfs_info.sb.fsid, uuid_str, UUID_STR_FORMAT_STD);
+ if (current_fs_info)
+ uuid_bin_to_str(current_fs_info->super_copy->fsid, uuid_str,
+ UUID_STR_FORMAT_STD);
return 0;
#endif
return -ENOSYS;
diff --git a/fs/btrfs/btrfs.h b/fs/btrfs/btrfs.h
index 25a8cf6a87..7d8b395b26 100644
--- a/fs/btrfs/btrfs.h
+++ b/fs/btrfs/btrfs.h
@@ -11,77 +11,18 @@
#include <linux/rbtree.h>
#include "conv-funcs.h"
-struct btrfs_info {
- struct btrfs_super_block sb;
-
- struct btrfs_root tree_root;
- struct btrfs_root fs_root;
- struct btrfs_root chunk_root;
-
- struct rb_root chunks_root;
-};
-
extern struct btrfs_info btrfs_info;
-
-/* hash.c */
-void btrfs_hash_init(void);
-u32 btrfs_crc32c(u32, const void *, size_t);
-u32 btrfs_csum_data(char *, u32, size_t);
-void btrfs_csum_final(u32, void *);
-
-static inline u64 btrfs_name_hash(const char *name, int len)
-{
- return btrfs_crc32c((u32) ~1, name, len);
-}
-
-/* dev.c */
-extern struct blk_desc *btrfs_blk_desc;
-extern struct disk_partition *btrfs_part_info;
-
-int btrfs_devread(u64, int, void *);
-
-/* chunk-map.c */
-u64 btrfs_map_logical_to_physical(u64);
-int btrfs_chunk_map_init(void);
-void btrfs_chunk_map_exit(void);
-int btrfs_read_chunk_tree(void);
+extern struct btrfs_fs_info *current_fs_info;
/* compression.c */
u32 btrfs_decompress(u8 type, const char *, u32, char *, u32);
-/* super.c */
-int btrfs_read_superblock(void);
-
-/* dir-item.c */
-typedef int (*btrfs_readdir_callback_t)(const struct btrfs_root *,
- struct btrfs_dir_item *);
-
-int btrfs_lookup_dir_item(const struct btrfs_root *, u64, const char *, int,
- struct btrfs_dir_item *);
-int btrfs_readdir(const struct btrfs_root *, u64, btrfs_readdir_callback_t);
-
-/* root.c */
-int btrfs_find_root(u64, struct btrfs_root *, struct btrfs_root_item *);
-u64 btrfs_lookup_root_ref(u64, struct btrfs_root_ref *, char *);
-
/* inode.c */
-u64 btrfs_lookup_inode_ref(struct btrfs_root *, u64, struct btrfs_inode_ref *,
- char *);
-int btrfs_lookup_inode(const struct btrfs_root *, struct btrfs_key *,
- struct btrfs_inode_item *, struct btrfs_root *);
-int btrfs_readlink(const struct btrfs_root *, u64, char *);
-u64 btrfs_lookup_path(struct btrfs_root *, u64, const char *, u8 *,
- struct btrfs_inode_item *, int);
-u64 btrfs_file_read(const struct btrfs_root *, u64, u64, u64, char *);
+int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target);
+int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len,
+ char *dest);
/* subvolume.c */
u64 btrfs_get_default_subvol_objectid(void);
-/* extent-io.c */
-u64 btrfs_read_extent_inline(struct btrfs_path *,
- struct btrfs_file_extent_item *, u64, u64,
- char *);
-u64 btrfs_read_extent_reg(struct btrfs_path *, struct btrfs_file_extent_item *,
- u64, u64, char *);
-
#endif /* !__BTRFS_BTRFS_H__ */
diff --git a/fs/btrfs/btrfs_tree.h b/fs/btrfs/btrfs_tree.h
deleted file mode 100644
index aa0f3d6c86..0000000000
--- a/fs/btrfs/btrfs_tree.h
+++ /dev/null
@@ -1,766 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * From linux/include/uapi/linux/btrfs_tree.h
- */
-
-#ifndef __BTRFS_BTRFS_TREE_H__
-#define __BTRFS_BTRFS_TREE_H__
-
-#include <common.h>
-
-#define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_NAME_MAX 255
-#define BTRFS_LABEL_SIZE 256
-#define BTRFS_FSID_SIZE 16
-#define BTRFS_UUID_SIZE 16
-
-/*
- * This header contains the structure definitions and constants used
- * by file system objects that can be retrieved using
- * the BTRFS_IOC_SEARCH_TREE ioctl. That means basically anything that
- * is needed to describe a leaf node's key or item contents.
- */
-
-/* holds pointers to all of the tree roots */
-#define BTRFS_ROOT_TREE_OBJECTID 1ULL
-
-/* stores information about which extents are in use, and reference counts */
-#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
-
-/*
- * chunk tree stores translations from logical -> physical block numbering
- * the super block points to the chunk tree
- */
-#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
-
-/*
- * stores information about which areas of a given device are in use.
- * one per device. The tree of tree roots points to the device tree
- */
-#define BTRFS_DEV_TREE_OBJECTID 4ULL
-
-/* one per subvolume, storing files and directories */
-#define BTRFS_FS_TREE_OBJECTID 5ULL
-
-/* directory objectid inside the root tree */
-#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
-
-/* holds checksums of all the data extents */
-#define BTRFS_CSUM_TREE_OBJECTID 7ULL
-
-/* holds quota configuration and tracking */
-#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
-
-/* for storing items that use the BTRFS_UUID_KEY* types */
-#define BTRFS_UUID_TREE_OBJECTID 9ULL
-
-/* tracks free space in block groups. */
-#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
-
-/* device stats in the device tree */
-#define BTRFS_DEV_STATS_OBJECTID 0ULL
-
-/* for storing balance parameters in the root tree */
-#define BTRFS_BALANCE_OBJECTID -4ULL
-
-/* orhpan objectid for tracking unlinked/truncated files */
-#define BTRFS_ORPHAN_OBJECTID -5ULL
-
-/* does write ahead logging to speed up fsyncs */
-#define BTRFS_TREE_LOG_OBJECTID -6ULL
-#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
-
-/* for space balancing */
-#define BTRFS_TREE_RELOC_OBJECTID -8ULL
-#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
-
-/*
- * extent checksums all have this objectid
- * this allows them to share the logging tree
- * for fsyncs
- */
-#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
-
-/* For storing free space cache */
-#define BTRFS_FREE_SPACE_OBJECTID -11ULL
-
-/*
- * The inode number assigned to the special inode for storing
- * free ino cache
- */
-#define BTRFS_FREE_INO_OBJECTID -12ULL
-
-/* dummy objectid represents multiple objectids */
-#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
-
-/*
- * All files have objectids in this range.
- */
-#define BTRFS_FIRST_FREE_OBJECTID 256ULL
-#define BTRFS_LAST_FREE_OBJECTID -256ULL
-#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
-
-
-/*
- * the device items go into the chunk tree. The key is in the form
- * [ 1 BTRFS_DEV_ITEM_KEY device_id ]
- */
-#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
-
-#define BTRFS_BTREE_INODE_OBJECTID 1
-
-#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
-
-#define BTRFS_DEV_REPLACE_DEVID 0ULL
-
-/*
- * inode items have the data typically returned from stat and store other
- * info about object characteristics. There is one for every file and dir in
- * the FS
- */
-#define BTRFS_INODE_ITEM_KEY 1
-#define BTRFS_INODE_REF_KEY 12
-#define BTRFS_INODE_EXTREF_KEY 13
-#define BTRFS_XATTR_ITEM_KEY 24
-#define BTRFS_ORPHAN_ITEM_KEY 48
-/* reserve 2-15 close to the inode for later flexibility */
-
-/*
- * dir items are the name -> inode pointers in a directory. There is one
- * for every name in a directory.
- */
-#define BTRFS_DIR_LOG_ITEM_KEY 60
-#define BTRFS_DIR_LOG_INDEX_KEY 72
-#define BTRFS_DIR_ITEM_KEY 84
-#define BTRFS_DIR_INDEX_KEY 96
-/*
- * extent data is for file data
- */
-#define BTRFS_EXTENT_DATA_KEY 108
-
-/*
- * extent csums are stored in a separate tree and hold csums for
- * an entire extent on disk.
- */
-#define BTRFS_EXTENT_CSUM_KEY 128
-
-/*
- * root items point to tree roots. They are typically in the root
- * tree used by the super block to find all the other trees
- */
-#define BTRFS_ROOT_ITEM_KEY 132
-
-/*
- * root backrefs tie subvols and snapshots to the directory entries that
- * reference them
- */
-#define BTRFS_ROOT_BACKREF_KEY 144
-
-/*
- * root refs make a fast index for listing all of the snapshots and
- * subvolumes referenced by a given root. They point directly to the
- * directory item in the root that references the subvol
- */
-#define BTRFS_ROOT_REF_KEY 156
-
-/*
- * extent items are in the extent map tree. These record which blocks
- * are used, and how many references there are to each block
- */
-#define BTRFS_EXTENT_ITEM_KEY 168
-
-/*
- * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
- * the length, so we save the level in key->offset instead of the length.
- */
-#define BTRFS_METADATA_ITEM_KEY 169
-
-#define BTRFS_TREE_BLOCK_REF_KEY 176
-
-#define BTRFS_EXTENT_DATA_REF_KEY 178
-
-#define BTRFS_EXTENT_REF_V0_KEY 180
-
-#define BTRFS_SHARED_BLOCK_REF_KEY 182
-
-#define BTRFS_SHARED_DATA_REF_KEY 184
-
-/*
- * block groups give us hints into the extent allocation trees. Which
- * blocks are free etc etc
- */
-#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
-
-/*
- * Every block group is represented in the free space tree by a free space info
- * item, which stores some accounting information. It is keyed on
- * (block_group_start, FREE_SPACE_INFO, block_group_length).
- */
-#define BTRFS_FREE_SPACE_INFO_KEY 198
-
-/*
- * A free space extent tracks an extent of space that is free in a block group.
- * It is keyed on (start, FREE_SPACE_EXTENT, length).
- */
-#define BTRFS_FREE_SPACE_EXTENT_KEY 199
-
-/*
- * When a block group becomes very fragmented, we convert it to use bitmaps
- * instead of extents. A free space bitmap is keyed on
- * (start, FREE_SPACE_BITMAP, length); the corresponding item is a bitmap with
- * (length / sectorsize) bits.
- */
-#define BTRFS_FREE_SPACE_BITMAP_KEY 200
-
-#define BTRFS_DEV_EXTENT_KEY 204
-#define BTRFS_DEV_ITEM_KEY 216
-#define BTRFS_CHUNK_ITEM_KEY 228
-
-/*
- * Records the overall state of the qgroups.
- * There's only one instance of this key present,
- * (0, BTRFS_QGROUP_STATUS_KEY, 0)
- */
-#define BTRFS_QGROUP_STATUS_KEY 240
-/*
- * Records the currently used space of the qgroup.
- * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
- */
-#define BTRFS_QGROUP_INFO_KEY 242
-/*
- * Contains the user configured limits for the qgroup.
- * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
- */
-#define BTRFS_QGROUP_LIMIT_KEY 244
-/*
- * Records the child-parent relationship of qgroups. For
- * each relation, 2 keys are present:
- * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
- * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
- */
-#define BTRFS_QGROUP_RELATION_KEY 246
-
-/*
- * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
- */
-#define BTRFS_BALANCE_ITEM_KEY 248
-
-/*
- * The key type for tree items that are stored persistently, but do not need to
- * exist for extended period of time. The items can exist in any tree.
- *
- * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
- *
- * Existing items:
- *
- * - balance status item
- * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
- */
-#define BTRFS_TEMPORARY_ITEM_KEY 248
-
-/*
- * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
- */
-#define BTRFS_DEV_STATS_KEY 249
-
-/*
- * The key type for tree items that are stored persistently and usually exist
- * for a long period, eg. filesystem lifetime. The item kinds can be status
- * information, stats or preference values. The item can exist in any tree.
- *
- * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
- *
- * Existing items:
- *
- * - device statistics, store IO stats in the device tree, one key for all
- * stats
- * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
- */
-#define BTRFS_PERSISTENT_ITEM_KEY 249
-
-/*
- * Persistantly stores the device replace state in the device tree.
- * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
- */
-#define BTRFS_DEV_REPLACE_KEY 250
-
-/*
- * Stores items that allow to quickly map UUIDs to something else.
- * These items are part of the filesystem UUID tree.
- * The key is built like this:
- * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
- */
-#if BTRFS_UUID_SIZE != 16
-#error "UUID items require BTRFS_UUID_SIZE == 16!"
-#endif
-#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
-#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
- * received subvols */
-
-/*
- * string items are for debugging. They just store a short string of
- * data in the FS
- */
-#define BTRFS_STRING_ITEM_KEY 253
-
-
-
-/* 32 bytes in various csum fields */
-#define BTRFS_CSUM_SIZE 32
-
-/* csum types */
-#define BTRFS_CSUM_TYPE_CRC32 0
-
-/*
- * flags definitions for directory entry item type
- *
- * Used by:
- * struct btrfs_dir_item.type
- */
-#define BTRFS_FT_UNKNOWN 0
-#define BTRFS_FT_REG_FILE 1
-#define BTRFS_FT_DIR 2
-#define BTRFS_FT_CHRDEV 3
-#define BTRFS_FT_BLKDEV 4
-#define BTRFS_FT_FIFO 5
-#define BTRFS_FT_SOCK 6
-#define BTRFS_FT_SYMLINK 7
-#define BTRFS_FT_XATTR 8
-#define BTRFS_FT_MAX 9
-
-/*
- * The key defines the order in the tree, and so it also defines (optimal)
- * block layout.
- *
- * objectid corresponds to the inode number.
- *
- * type tells us things about the object, and is a kind of stream selector.
- * so for a given inode, keys with type of 1 might refer to the inode data,
- * type of 2 may point to file data in the btree and type == 3 may point to
- * extents.
- *
- * offset is the starting byte offset for this key in the stream.
- */
-
-struct btrfs_key {
- __u64 objectid;
- __u8 type;
- __u64 offset;
-} __attribute__ ((__packed__));
-
-struct btrfs_dev_item {
- /* the internal btrfs device id */
- __u64 devid;
-
- /* size of the device */
- __u64 total_bytes;
-
- /* bytes used */
- __u64 bytes_used;
-
- /* optimal io alignment for this device */
- __u32 io_align;
-
- /* optimal io width for this device */
- __u32 io_width;
-
- /* minimal io size for this device */
- __u32 sector_size;
-
- /* type and info about this device */
- __u64 type;
-
- /* expected generation for this device */
- __u64 generation;
-
- /*
- * starting byte of this partition on the device,
- * to allow for stripe alignment in the future
- */
- __u64 start_offset;
-
- /* grouping information for allocation decisions */
- __u32 dev_group;
-
- /* seek speed 0-100 where 100 is fastest */
- __u8 seek_speed;
-
- /* bandwidth 0-100 where 100 is fastest */
- __u8 bandwidth;
-
- /* btrfs generated uuid for this device */
- __u8 uuid[BTRFS_UUID_SIZE];
-
- /* uuid of FS who owns this device */
- __u8 fsid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_stripe {
- __u64 devid;
- __u64 offset;
- __u8 dev_uuid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_chunk {
- /* size of this chunk in bytes */
- __u64 length;
-
- /* objectid of the root referencing this chunk */
- __u64 owner;
-
- __u64 stripe_len;
- __u64 type;
-
- /* optimal io alignment for this chunk */
- __u32 io_align;
-
- /* optimal io width for this chunk */
- __u32 io_width;
-
- /* minimal io size for this chunk */
- __u32 sector_size;
-
- /* 2^16 stripes is quite a lot, a second limit is the size of a single
- * item in the btree
- */
- __u16 num_stripes;
-
- /* sub stripes only matter for raid10 */
- __u16 sub_stripes;
- struct btrfs_stripe stripe;
- /* additional stripes go here */
-} __attribute__ ((__packed__));
-
-#define BTRFS_FREE_SPACE_EXTENT 1
-#define BTRFS_FREE_SPACE_BITMAP 2
-
-struct btrfs_free_space_entry {
- __u64 offset;
- __u64 bytes;
- __u8 type;
-} __attribute__ ((__packed__));
-
-struct btrfs_free_space_header {
- struct btrfs_key location;
- __u64 generation;
- __u64 num_entries;
- __u64 num_bitmaps;
-} __attribute__ ((__packed__));
-
-#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
-#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
-
-/* Super block flags */
-/* Errors detected */
-#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
-
-#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
-#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
-
-
-/*
- * items in the extent btree are used to record the objectid of the
- * owner of the block and the number of references
- */
-
-struct btrfs_extent_item {
- __u64 refs;
- __u64 generation;
- __u64 flags;
-} __attribute__ ((__packed__));
-
-
-#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
-#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
-
-/* following flags only apply to tree blocks */
-
-/* use full backrefs for extent pointers in the block */
-#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
-
-/*
- * this flag is only used internally by scrub and may be changed at any time
- * it is only declared here to avoid collisions
- */
-#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
-
-struct btrfs_tree_block_info {
- struct btrfs_key key;
- __u8 level;
-} __attribute__ ((__packed__));
-
-struct btrfs_extent_data_ref {
- __u64 root;
- __u64 objectid;
- __u64 offset;
- __u32 count;
-} __attribute__ ((__packed__));
-
-struct btrfs_shared_data_ref {
- __u32 count;
-} __attribute__ ((__packed__));
-
-struct btrfs_extent_inline_ref {
- __u8 type;
- __u64 offset;
-} __attribute__ ((__packed__));
-
-/* dev extents record free space on individual devices. The owner
- * field points back to the chunk allocation mapping tree that allocated
- * the extent. The chunk tree uuid field is a way to double check the owner
- */
-struct btrfs_dev_extent {
- __u64 chunk_tree;
- __u64 chunk_objectid;
- __u64 chunk_offset;
- __u64 length;
- __u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_ref {
- __u64 index;
- __u16 name_len;
- /* name goes here */
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_extref {
- __u64 parent_objectid;
- __u64 index;
- __u16 name_len;
- __u8 name[0];
- /* name goes here */
-} __attribute__ ((__packed__));
-
-struct btrfs_timespec {
- __u64 sec;
- __u32 nsec;
-} __attribute__ ((__packed__));
-
-struct btrfs_inode_item {
- /* nfs style generation number */
- __u64 generation;
- /* transid that last touched this inode */
- __u64 transid;
- __u64 size;
- __u64 nbytes;
- __u64 block_group;
- __u32 nlink;
- __u32 uid;
- __u32 gid;
- __u32 mode;
- __u64 rdev;
- __u64 flags;
-
- /* modification sequence number for NFS */
- __u64 sequence;
-
- /*
- * a little future expansion, for more than this we can
- * just grow the inode item and version it
- */
- __u64 reserved[4];
- struct btrfs_timespec atime;
- struct btrfs_timespec ctime;
- struct btrfs_timespec mtime;
- struct btrfs_timespec otime;
-} __attribute__ ((__packed__));
-
-struct btrfs_dir_log_item {
- __u64 end;
-} __attribute__ ((__packed__));
-
-struct btrfs_dir_item {
- struct btrfs_key location;
- __u64 transid;
- __u16 data_len;
- __u16 name_len;
- __u8 type;
-} __attribute__ ((__packed__));
-
-#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
-
-/*
- * Internal in-memory flag that a subvolume has been marked for deletion but
- * still visible as a directory
- */
-#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48)
-
-struct btrfs_root_item {
- struct btrfs_inode_item inode;
- __u64 generation;
- __u64 root_dirid;
- __u64 bytenr;
- __u64 byte_limit;
- __u64 bytes_used;
- __u64 last_snapshot;
- __u64 flags;
- __u32 refs;
- struct btrfs_key drop_progress;
- __u8 drop_level;
- __u8 level;
-
- /*
- * The following fields appear after subvol_uuids+subvol_times
- * were introduced.
- */
-
- /*
- * This generation number is used to test if the new fields are valid
- * and up to date while reading the root item. Every time the root item
- * is written out, the "generation" field is copied into this field. If
- * anyone ever mounted the fs with an older kernel, we will have
- * mismatching generation values here and thus must invalidate the
- * new fields. See btrfs_update_root and btrfs_find_last_root for
- * details.
- * the offset of generation_v2 is also used as the start for the memset
- * when invalidating the fields.
- */
- __u64 generation_v2;
- __u8 uuid[BTRFS_UUID_SIZE];
- __u8 parent_uuid[BTRFS_UUID_SIZE];
- __u8 received_uuid[BTRFS_UUID_SIZE];
- __u64 ctransid; /* updated when an inode changes */
- __u64 otransid; /* trans when created */
- __u64 stransid; /* trans when sent. non-zero for received subvol */
- __u64 rtransid; /* trans when received. non-zero for received subvol */
- struct btrfs_timespec ctime;
- struct btrfs_timespec otime;
- struct btrfs_timespec stime;
- struct btrfs_timespec rtime;
- __u64 reserved[8]; /* for future */
-} __attribute__ ((__packed__));
-
-/*
- * this is used for both forward and backward root refs
- */
-struct btrfs_root_ref {
- __u64 dirid;
- __u64 sequence;
- __u16 name_len;
-} __attribute__ ((__packed__));
-
-#define BTRFS_FILE_EXTENT_INLINE 0
-#define BTRFS_FILE_EXTENT_REG 1
-#define BTRFS_FILE_EXTENT_PREALLOC 2
-
-enum btrfs_compression_type {
- BTRFS_COMPRESS_NONE = 0,
- BTRFS_COMPRESS_ZLIB = 1,
- BTRFS_COMPRESS_LZO = 2,
- BTRFS_COMPRESS_ZSTD = 3,
- BTRFS_COMPRESS_TYPES = 3,
- BTRFS_COMPRESS_LAST = 4,
-};
-
-struct btrfs_file_extent_item {
- /*
- * transaction id that created this extent
- */
- __u64 generation;
- /*
- * max number of bytes to hold this extent in ram
- * when we split a compressed extent we can't know how big
- * each of the resulting pieces will be. So, this is
- * an upper limit on the size of the extent in ram instead of
- * an exact limit.
- */
- __u64 ram_bytes;
-
- /*
- * 32 bits for the various ways we might encode the data,
- * including compression and encryption. If any of these
- * are set to something a given disk format doesn't understand
- * it is treated like an incompat flag for reading and writing,
- * but not for stat.
- */
- __u8 compression;
- __u8 encryption;
- __u16 other_encoding; /* spare for later use */
-
- /* are we inline data or a real extent? */
- __u8 type;
-
- /*
- * disk space consumed by the extent, checksum blocks are included
- * in these numbers
- *
- * At this offset in the structure, the inline extent data start.
- */
- __u64 disk_bytenr;
- __u64 disk_num_bytes;
- /*
- * the logical offset in file blocks (no csums)
- * this extent record is for. This allows a file extent to point
- * into the middle of an existing extent on disk, sharing it
- * between two snapshots (useful if some bytes in the middle of the
- * extent have changed
- */
- __u64 offset;
- /*
- * the logical number of file blocks (no csums included). This
- * always reflects the size uncompressed and without encoding.
- */
- __u64 num_bytes;
-
-} __attribute__ ((__packed__));
-
-struct btrfs_csum_item {
- __u8 csum;
-} __attribute__ ((__packed__));
-
-/* different types of block groups (and chunks) */
-#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
-#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
-#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
-#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
-#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
-#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
-#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
-#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
-#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
-#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
- BTRFS_SPACE_INFO_GLOBAL_RSV)
-
-enum btrfs_raid_types {
- BTRFS_RAID_RAID10,
- BTRFS_RAID_RAID1,
- BTRFS_RAID_DUP,
- BTRFS_RAID_RAID0,
- BTRFS_RAID_SINGLE,
- BTRFS_RAID_RAID5,
- BTRFS_RAID_RAID6,
- BTRFS_NR_RAID_TYPES
-};
-
-#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
- BTRFS_BLOCK_GROUP_SYSTEM | \
- BTRFS_BLOCK_GROUP_METADATA)
-
-#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
- BTRFS_BLOCK_GROUP_RAID1 | \
- BTRFS_BLOCK_GROUP_RAID5 | \
- BTRFS_BLOCK_GROUP_RAID6 | \
- BTRFS_BLOCK_GROUP_DUP | \
- BTRFS_BLOCK_GROUP_RAID10)
-#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
- BTRFS_BLOCK_GROUP_RAID6)
-
-/*
- * We need a bit for restriper to be able to tell when chunks of type
- * SINGLE are available. This "extended" profile format is used in
- * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
- * (on-disk). The corresponding on-disk bit in chunk.type is reserved
- * to avoid remappings between two formats in future.
- */
-#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
-
-/*
- * A fake block group type that is used to communicate global block reserve
- * size to userspace via the SPACE_INFO ioctl.
- */
-#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49)
-
-#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
- BTRFS_AVAIL_ALLOC_BIT_SINGLE)
-
-#endif /* __BTRFS_BTRFS_TREE_H__ */
diff --git a/fs/btrfs/chunk-map.c b/fs/btrfs/chunk-map.c
deleted file mode 100644
index 2e5be65067..0000000000
--- a/fs/btrfs/chunk-map.c
+++ /dev/null
@@ -1,178 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include "btrfs.h"
-#include <log.h>
-#include <malloc.h>
-
-struct chunk_map_item {
- struct rb_node node;
- u64 logical;
- u64 length;
- u64 physical;
-};
-
-static int add_chunk_mapping(struct btrfs_key *key, struct btrfs_chunk *chunk)
-{
- struct btrfs_stripe *stripe;
- u64 block_profile = chunk->type & BTRFS_BLOCK_GROUP_PROFILE_MASK;
- struct rb_node **new = &(btrfs_info.chunks_root.rb_node), *prnt = NULL;
- struct chunk_map_item *map_item;
-
- if (block_profile && block_profile != BTRFS_BLOCK_GROUP_DUP) {
- printf("%s: unsupported chunk profile %llu\n", __func__,
- block_profile);
- return -1;
- } else if (!chunk->length) {
- printf("%s: zero length chunk\n", __func__);
- return -1;
- }
-
- stripe = &chunk->stripe;
- btrfs_stripe_to_cpu(stripe);
-
- while (*new) {
- struct chunk_map_item *this;
-
- this = rb_entry(*new, struct chunk_map_item, node);
-
- prnt = *new;
- if (key->offset < this->logical) {
- new = &((*new)->rb_left);
- } else if (key->offset > this->logical) {
- new = &((*new)->rb_right);
- } else {
- debug("%s: Logical address %llu already in map!\n",
- __func__, key->offset);
- return 0;
- }
- }
-
- map_item = malloc(sizeof(struct chunk_map_item));
- if (!map_item)
- return -1;
-
- map_item->logical = key->offset;
- map_item->length = chunk->length;
- map_item->physical = le64_to_cpu(chunk->stripe.offset);
- rb_link_node(&map_item->node, prnt, new);
- rb_insert_color(&map_item->node, &btrfs_info.chunks_root);
-
- debug("%s: Mapping %llu to %llu\n", __func__, map_item->logical,
- map_item->physical);
-
- return 0;
-}
-
-u64 btrfs_map_logical_to_physical(u64 logical)
-{
- struct rb_node *node = btrfs_info.chunks_root.rb_node;
-
- while (node) {
- struct chunk_map_item *item;
-
- item = rb_entry(node, struct chunk_map_item, node);
-
- if (item->logical > logical)
- node = node->rb_left;
- else if (logical >= item->logical + item->length)
- node = node->rb_right;
- else
- return item->physical + logical - item->logical;
- }
-
- printf("%s: Cannot map logical address %llu to physical\n", __func__,
- logical);
-
- return -1ULL;
-}
-
-void btrfs_chunk_map_exit(void)
-{
- struct rb_node *now, *next;
- struct chunk_map_item *item;
-
- for (now = rb_first_postorder(&btrfs_info.chunks_root); now; now = next)
- {
- item = rb_entry(now, struct chunk_map_item, node);
- next = rb_next_postorder(now);
- free(item);
- }
-}
-
-int btrfs_chunk_map_init(void)
-{
- u8 sys_chunk_array_copy[sizeof(btrfs_info.sb.sys_chunk_array)];
- u8 * const start = sys_chunk_array_copy;
- u8 * const end = start + btrfs_info.sb.sys_chunk_array_size;
- u8 *cur;
- struct btrfs_key *key;
- struct btrfs_chunk *chunk;
-
- btrfs_info.chunks_root = RB_ROOT;
-
- memcpy(sys_chunk_array_copy, btrfs_info.sb.sys_chunk_array,
- sizeof(sys_chunk_array_copy));
-
- for (cur = start; cur < end;) {
- key = (struct btrfs_key *) cur;
- cur += sizeof(struct btrfs_key);
- chunk = (struct btrfs_chunk *) cur;
-
- btrfs_key_to_cpu(key);
- btrfs_chunk_to_cpu(chunk);
-
- if (key->type != BTRFS_CHUNK_ITEM_KEY) {
- printf("%s: invalid key type %u\n", __func__,
- key->type);
- return -1;
- }
-
- if (add_chunk_mapping(key, chunk))
- return -1;
-
- cur += sizeof(struct btrfs_chunk);
- cur += sizeof(struct btrfs_stripe) * (chunk->num_stripes - 1);
- }
-
- return 0;
-}
-
-int btrfs_read_chunk_tree(void)
-{
- struct btrfs_path path;
- struct btrfs_key key, *found_key;
- struct btrfs_chunk *chunk;
- int res = 0;
-
- key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
- key.type = BTRFS_CHUNK_ITEM_KEY;
- key.offset = 0;
-
- if (btrfs_search_tree(&btrfs_info.chunk_root, &key, &path))
- return -1;
-
- do {
- found_key = btrfs_path_leaf_key(&path);
- if (btrfs_comp_keys_type(&key, found_key))
- continue;
-
- chunk = btrfs_path_item_ptr(&path, struct btrfs_chunk);
- btrfs_chunk_to_cpu(chunk);
- if (add_chunk_mapping(found_key, chunk)) {
- res = -1;
- break;
- }
- } while (!(res = btrfs_next_slot(&path)));
-
- btrfs_free_path(&path);
-
- if (res < 0)
- return -1;
-
- return 0;
-}
diff --git a/fs/btrfs/common/rbtree-utils.c b/fs/btrfs/common/rbtree-utils.c
new file mode 100644
index 0000000000..7a7d7e84e6
--- /dev/null
+++ b/fs/btrfs/common/rbtree-utils.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/errno.h>
+#include "rbtree-utils.h"
+
+int rb_insert(struct rb_root *root, struct rb_node *node,
+ rb_compare_nodes comp)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ int ret;
+
+ while(*p) {
+ parent = *p;
+
+ ret = comp(parent, node);
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else if (ret > 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ rb_link_node(node, parent, p);
+ rb_insert_color(node, root);
+ return 0;
+}
+
+struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp,
+ struct rb_node **next_ret)
+{
+ struct rb_node *n = root->rb_node;
+ struct rb_node *parent = NULL;
+ int ret = 0;
+
+ while(n) {
+ parent = n;
+
+ ret = comp(n, key);
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ return n;
+ }
+
+ if (!next_ret)
+ return NULL;
+
+ if (parent && ret > 0)
+ parent = rb_next(parent);
+
+ *next_ret = parent;
+ return NULL;
+}
+
+void rb_free_nodes(struct rb_root *root, rb_free_node free_node)
+{
+ struct rb_node *node;
+
+ while ((node = rb_first(root))) {
+ rb_erase(node, root);
+ free_node(node);
+ }
+}
diff --git a/fs/btrfs/common/rbtree-utils.h b/fs/btrfs/common/rbtree-utils.h
new file mode 100644
index 0000000000..d977cfd955
--- /dev/null
+++ b/fs/btrfs/common/rbtree-utils.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2014 Facebook. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __RBTREE_UTILS__
+#define __RBTREE_UTILS__
+
+#include <linux/rbtree.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* The common insert/search/free functions */
+typedef int (*rb_compare_nodes)(struct rb_node *node1, struct rb_node *node2);
+typedef int (*rb_compare_keys)(struct rb_node *node, void *key);
+typedef void (*rb_free_node)(struct rb_node *node);
+
+int rb_insert(struct rb_root *root, struct rb_node *node,
+ rb_compare_nodes comp);
+/*
+ * In some cases, we need return the next node if we don't find the node we
+ * specify. At this time, we can use next_ret.
+ */
+struct rb_node *rb_search(struct rb_root *root, void *key, rb_compare_keys comp,
+ struct rb_node **next_ret);
+void rb_free_nodes(struct rb_root *root, rb_free_node free_node);
+
+#define FREE_RB_BASED_TREE(name, free_func) \
+static void free_##name##_tree(struct rb_root *root) \
+{ \
+ rb_free_nodes(root, free_func); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/fs/btrfs/compat.h b/fs/btrfs/compat.h
new file mode 100644
index 0000000000..9cf8a10c76
--- /dev/null
+++ b/fs/btrfs/compat.h
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#ifndef __BTRFS_COMPAT_H__
+#define __BTRFS_COMPAT_H__
+
+#include <linux/errno.h>
+#include <fs_internal.h>
+#include <uuid.h>
+
+/* Provide a compatibility layer to make code syncing easier */
+
+/* A simple wraper to for error() used in btrfs-progs */
+#define error(fmt, ...) pr_err("BTRFS: " fmt "\n", ##__VA_ARGS__)
+
+#define ASSERT(c) assert(c)
+
+#define BTRFS_UUID_UNPARSED_SIZE 37
+
+/* No <linux/limits.h> so have to define it here */
+#define XATTR_NAME_MAX 255
+#define PATH_MAX 4096
+
+/*
+ * Macros to generate set/get funcs for the struct fields
+ * assume there is a lefoo_to_cpu for every type, so lets make a simple
+ * one for u8:
+ */
+#define le8_to_cpu(v) (v)
+#define cpu_to_le8(v) (v)
+#define __le8 u8
+
+/*
+ * Macros to generate set/get funcs for the struct fields
+ * assume there is a lefoo_to_cpu for every type, so lets make a simple
+ * one for u8:
+ */
+#define le8_to_cpu(v) (v)
+#define cpu_to_le8(v) (v)
+#define __le8 u8
+
+#define get_unaligned_le8(p) (*((u8 *)(p)))
+#define get_unaligned_8(p) (*((u8 *)(p)))
+#define put_unaligned_le8(val,p) ((*((u8 *)(p))) = (val))
+#define put_unaligned_8(val,p) ((*((u8 *)(p))) = (val))
+
+/*
+ * Read data from device specified by @desc and @part
+ *
+ * U-boot equivalent of pread().
+ *
+ * Return the bytes of data read.
+ * Return <0 for error.
+ */
+static inline int __btrfs_devread(struct blk_desc *desc,
+ struct disk_partition *part,
+ void *buf, size_t size, u64 offset)
+{
+ lbaint_t sector;
+ int byte_offset;
+ int ret;
+
+ sector = offset >> desc->log2blksz;
+ byte_offset = offset % desc->blksz;
+
+ /* fs_devread() return 0 for error, >0 for success */
+ ret = fs_devread(desc, part, sector, byte_offset, size, buf);
+ if (!ret)
+ return -EIO;
+ return size;
+}
+
+static inline void uuid_unparse(const u8 *uuid, char *out)
+{
+ return uuid_bin_to_str((unsigned char *)uuid, out, 0);
+}
+
+static inline int is_power_of_2(unsigned long n)
+{
+ return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+#endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 59e4a94cb2..23efefa199 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -115,7 +115,7 @@ static u32 decompress_zlib(const u8 *_cbuf, u32 clen, u8 *dbuf, u32 dlen)
while (stream.total_in < clen) {
stream.next_in = cbuf + stream.total_in;
stream.avail_in = min((u32) (clen - stream.total_in),
- (u32) btrfs_info.sb.sectorsize);
+ current_fs_info->sectorsize);
ret = inflate(&stream, Z_NO_FLUSH);
if (ret != Z_OK)
diff --git a/fs/btrfs/crypto/hash.c b/fs/btrfs/crypto/hash.c
new file mode 100644
index 0000000000..fb51f6386c
--- /dev/null
+++ b/fs/btrfs/crypto/hash.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/xxhash.h>
+#include <linux/unaligned/access_ok.h>
+#include <linux/types.h>
+#include <u-boot/sha256.h>
+#include <u-boot/crc.h>
+
+static u32 btrfs_crc32c_table[256];
+
+void btrfs_hash_init(void)
+{
+ static int inited = 0;
+
+ if (!inited) {
+ crc32c_init(btrfs_crc32c_table, 0x82F63B78);
+ inited = 1;
+ }
+}
+
+int hash_sha256(const u8 *buf, size_t length, u8 *out)
+{
+ sha256_context ctx;
+
+ sha256_starts(&ctx);
+ sha256_update(&ctx, buf, length);
+ sha256_finish(&ctx, out);
+
+ return 0;
+}
+
+int hash_xxhash(const u8 *buf, size_t length, u8 *out)
+{
+ u64 hash;
+
+ hash = xxh64(buf, length, 0);
+ put_unaligned_le64(hash, out);
+
+ return 0;
+}
+
+int hash_crc32c(const u8 *buf, size_t length, u8 *out)
+{
+ u32 crc;
+
+ crc = crc32c_cal((u32)~0, (char *)buf, length, btrfs_crc32c_table);
+ put_unaligned_le32(~crc, out);
+
+ return 0;
+}
+
+u32 crc32c(u32 seed, const void * data, size_t len)
+{
+ return crc32c_cal(seed, data, len, btrfs_crc32c_table);
+}
diff --git a/fs/btrfs/crypto/hash.h b/fs/btrfs/crypto/hash.h
new file mode 100644
index 0000000000..d1ba1fa374
--- /dev/null
+++ b/fs/btrfs/crypto/hash.h
@@ -0,0 +1,17 @@
+#ifndef CRYPTO_HASH_H
+#define CRYPTO_HASH_H
+
+#include <linux/types.h>
+
+#define CRYPTO_HASH_SIZE_MAX 32
+
+void btrfs_hash_init(void);
+int hash_crc32c(const u8 *buf, size_t length, u8 *out);
+int hash_xxhash(const u8 *buf, size_t length, u8 *out);
+int hash_sha256(const u8 *buf, size_t length, u8 *out);
+
+u32 crc32c(u32 seed, const void * data, size_t len);
+
+/* Blake2B is not yet supported due to lack of library */
+
+#endif
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 28f98d43ad..5ffced9160 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -5,287 +5,738 @@
* 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
*/
-#include "btrfs.h"
+#include <linux/kernel.h>
#include <log.h>
#include <malloc.h>
#include <memalign.h>
+#include "btrfs.h"
+#include "disk-io.h"
+
+static const struct btrfs_csum {
+ u16 size;
+ const char name[14];
+} btrfs_csums[] = {
+ [BTRFS_CSUM_TYPE_CRC32] = { 4, "crc32c" },
+ [BTRFS_CSUM_TYPE_XXHASH] = { 8, "xxhash64" },
+ [BTRFS_CSUM_TYPE_SHA256] = { 32, "sha256" },
+ [BTRFS_CSUM_TYPE_BLAKE2] = { 32, "blake2" },
+};
+
+u16 btrfs_super_csum_size(const struct btrfs_super_block *sb)
+{
+ const u16 csum_type = btrfs_super_csum_type(sb);
+
+ return btrfs_csums[csum_type].size;
+}
+
+const char *btrfs_super_csum_name(u16 csum_type)
+{
+ return btrfs_csums[csum_type].name;
+}
+
+size_t btrfs_super_num_csums(void)
+{
+ return ARRAY_SIZE(btrfs_csums);
+}
+
+u16 btrfs_csum_type_size(u16 csum_type)
+{
+ return btrfs_csums[csum_type].size;
+}
+
+struct btrfs_path *btrfs_alloc_path(void)
+{
+ struct btrfs_path *path;
+ path = kzalloc(sizeof(struct btrfs_path), GFP_NOFS);
+ return path;
+}
+
+void btrfs_free_path(struct btrfs_path *p)
+{
+ if (!p)
+ return;
+ btrfs_release_path(p);
+ kfree(p);
+}
+
+void btrfs_release_path(struct btrfs_path *p)
+{
+ int i;
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++) {
+ if (!p->nodes[i])
+ continue;
+ free_extent_buffer(p->nodes[i]);
+ }
+ memset(p, 0, sizeof(*p));
+}
-int btrfs_comp_keys(struct btrfs_key *a, struct btrfs_key *b)
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2)
{
- if (a->objectid > b->objectid)
+ if (k1->objectid > k2->objectid)
return 1;
- if (a->objectid < b->objectid)
+ if (k1->objectid < k2->objectid)
return -1;
- if (a->type > b->type)
+ if (k1->type > k2->type)
return 1;
- if (a->type < b->type)
+ if (k1->type < k2->type)
return -1;
- if (a->offset > b->offset)
+ if (k1->offset > k2->offset)
return 1;
- if (a->offset < b->offset)
+ if (k1->offset < k2->offset)
return -1;
return 0;
}
-int btrfs_comp_keys_type(struct btrfs_key *a, struct btrfs_key *b)
+static int btrfs_comp_keys(struct btrfs_disk_key *disk,
+ const struct btrfs_key *k2)
{
- if (a->objectid > b->objectid)
- return 1;
- if (a->objectid < b->objectid)
- return -1;
- if (a->type > b->type)
- return 1;
- if (a->type < b->type)
- return -1;
- return 0;
+ struct btrfs_key k1;
+
+ btrfs_disk_key_to_cpu(&k1, disk);
+ return btrfs_comp_cpu_keys(&k1, k2);
}
-static int generic_bin_search(void *addr, int item_size, struct btrfs_key *key,
- int max, int *slot)
+enum btrfs_tree_block_status
+btrfs_check_node(struct btrfs_fs_info *fs_info,
+ struct btrfs_disk_key *parent_key, struct extent_buffer *buf)
{
- int low = 0, high = max, mid, ret;
- struct btrfs_key *tmp;
+ int i;
+ struct btrfs_key cpukey;
+ struct btrfs_disk_key key;
+ u32 nritems = btrfs_header_nritems(buf);
+ enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
+
+ if (nritems == 0 || nritems > BTRFS_NODEPTRS_PER_BLOCK(fs_info))
+ goto fail;
+
+ ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
+ if (parent_key && parent_key->type) {
+ btrfs_node_key(buf, &key, 0);
+ if (memcmp(parent_key, &key, sizeof(key)))
+ goto fail;
+ }
+ ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
+ for (i = 0; nritems > 1 && i < nritems - 2; i++) {
+ btrfs_node_key(buf, &key, i);
+ btrfs_node_key_to_cpu(buf, &cpukey, i + 1);
+ if (btrfs_comp_keys(&key, &cpukey) >= 0)
+ goto fail;
+ }
+ return BTRFS_TREE_BLOCK_CLEAN;
+fail:
+ return ret;
+}
+
+enum btrfs_tree_block_status
+btrfs_check_leaf(struct btrfs_fs_info *fs_info,
+ struct btrfs_disk_key *parent_key, struct extent_buffer *buf)
+{
+ int i;
+ struct btrfs_key cpukey;
+ struct btrfs_disk_key key;
+ u32 nritems = btrfs_header_nritems(buf);
+ enum btrfs_tree_block_status ret = BTRFS_TREE_BLOCK_INVALID_NRITEMS;
+
+ if (nritems * sizeof(struct btrfs_item) > buf->len) {
+ fprintf(stderr, "invalid number of items %llu\n",
+ (unsigned long long)buf->start);
+ goto fail;
+ }
+
+ if (btrfs_header_level(buf) != 0) {
+ ret = BTRFS_TREE_BLOCK_INVALID_LEVEL;
+ fprintf(stderr, "leaf is not a leaf %llu\n",
+ (unsigned long long)btrfs_header_bytenr(buf));
+ goto fail;
+ }
+ if (btrfs_leaf_free_space(buf) < 0) {
+ ret = BTRFS_TREE_BLOCK_INVALID_FREE_SPACE;
+ fprintf(stderr, "leaf free space incorrect %llu %d\n",
+ (unsigned long long)btrfs_header_bytenr(buf),
+ btrfs_leaf_free_space(buf));
+ goto fail;
+ }
- while (low < high) {
+ if (nritems == 0)
+ return BTRFS_TREE_BLOCK_CLEAN;
+
+ btrfs_item_key(buf, &key, 0);
+ if (parent_key && parent_key->type &&
+ memcmp(parent_key, &key, sizeof(key))) {
+ ret = BTRFS_TREE_BLOCK_INVALID_PARENT_KEY;
+ fprintf(stderr, "leaf parent key incorrect %llu\n",
+ (unsigned long long)btrfs_header_bytenr(buf));
+ goto fail;
+ }
+ for (i = 0; nritems > 1 && i < nritems - 1; i++) {
+ btrfs_item_key(buf, &key, i);
+ btrfs_item_key_to_cpu(buf, &cpukey, i + 1);
+ if (btrfs_comp_keys(&key, &cpukey) >= 0) {
+ ret = BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
+ fprintf(stderr, "bad key ordering %d %d\n", i, i+1);
+ goto fail;
+ }
+ if (btrfs_item_offset_nr(buf, i) !=
+ btrfs_item_end_nr(buf, i + 1)) {
+ ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+ fprintf(stderr, "incorrect offsets %u %u\n",
+ btrfs_item_offset_nr(buf, i),
+ btrfs_item_end_nr(buf, i + 1));
+ goto fail;
+ }
+ if (i == 0 && btrfs_item_end_nr(buf, i) !=
+ BTRFS_LEAF_DATA_SIZE(fs_info)) {
+ ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+ fprintf(stderr, "bad item end %u wanted %u\n",
+ btrfs_item_end_nr(buf, i),
+ (unsigned)BTRFS_LEAF_DATA_SIZE(fs_info));
+ goto fail;
+ }
+ }
+
+ for (i = 0; i < nritems; i++) {
+ if (btrfs_item_end_nr(buf, i) >
+ BTRFS_LEAF_DATA_SIZE(fs_info)) {
+ btrfs_item_key(buf, &key, 0);
+ ret = BTRFS_TREE_BLOCK_INVALID_OFFSETS;
+ fprintf(stderr, "slot end outside of leaf %llu > %llu\n",
+ (unsigned long long)btrfs_item_end_nr(buf, i),
+ (unsigned long long)BTRFS_LEAF_DATA_SIZE(
+ fs_info));
+ goto fail;
+ }
+ }
+
+ return BTRFS_TREE_BLOCK_CLEAN;
+fail:
+ return ret;
+}
+
+static int noinline check_block(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path, int level)
+{
+ struct btrfs_disk_key key;
+ struct btrfs_disk_key *key_ptr = NULL;
+ struct extent_buffer *parent;
+ enum btrfs_tree_block_status ret;
+
+ if (path->nodes[level + 1]) {
+ parent = path->nodes[level + 1];
+ btrfs_node_key(parent, &key, path->slots[level + 1]);
+ key_ptr = &key;
+ }
+ if (level == 0)
+ ret = btrfs_check_leaf(fs_info, key_ptr, path->nodes[0]);
+ else
+ ret = btrfs_check_node(fs_info, key_ptr, path->nodes[level]);
+ if (ret == BTRFS_TREE_BLOCK_CLEAN)
+ return 0;
+ return -EIO;
+}
+
+/*
+ * search for key in the extent_buffer. The items start at offset p,
+ * and they are item_size apart. There are 'max' items in p.
+ *
+ * the slot in the array is returned via slot, and it points to
+ * the place where you would insert key if it is not found in
+ * the array.
+ *
+ * slot may point to max if the key is bigger than all of the keys
+ */
+static int generic_bin_search(struct extent_buffer *eb, unsigned long p,
+ int item_size, const struct btrfs_key *key,
+ int max, int *slot)
+{
+ int low = 0;
+ int high = max;
+ int mid;
+ int ret;
+ unsigned long offset;
+ struct btrfs_disk_key *tmp;
+
+ while(low < high) {
mid = (low + high) / 2;
+ offset = p + mid * item_size;
- tmp = (struct btrfs_key *) ((u8 *) addr + mid*item_size);
+ tmp = (struct btrfs_disk_key *)(eb->data + offset);
ret = btrfs_comp_keys(tmp, key);
- if (ret < 0) {
+ if (ret < 0)
low = mid + 1;
- } else if (ret > 0) {
+ else if (ret > 0)
high = mid;
- } else {
+ else {
*slot = mid;
return 0;
}
}
-
*slot = low;
return 1;
}
-int btrfs_bin_search(union btrfs_tree_node *p, struct btrfs_key *key,
+/*
+ * simple bin_search frontend that does the right thing for
+ * leaves vs nodes
+ */
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
int *slot)
{
- void *addr;
- unsigned long size;
-
- if (p->header.level) {
- addr = p->node.ptrs;
- size = sizeof(struct btrfs_key_ptr);
- } else {
- addr = p->leaf.items;
- size = sizeof(struct btrfs_item);
- }
-
- return generic_bin_search(addr, size, key, p->header.nritems, slot);
+ if (btrfs_header_level(eb) == 0)
+ return generic_bin_search(eb,
+ offsetof(struct btrfs_leaf, items),
+ sizeof(struct btrfs_item),
+ key, btrfs_header_nritems(eb),
+ slot);
+ else
+ return generic_bin_search(eb,
+ offsetof(struct btrfs_node, ptrs),
+ sizeof(struct btrfs_key_ptr),
+ key, btrfs_header_nritems(eb),
+ slot);
}
-static void clear_path(struct btrfs_path *p)
+struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *parent, int slot)
{
- int i;
-
- for (i = 0; i < BTRFS_MAX_LEVEL; ++i) {
- p->nodes[i] = NULL;
- p->slots[i] = 0;
+ struct extent_buffer *ret;
+ int level = btrfs_header_level(parent);
+
+ if (slot < 0)
+ return NULL;
+ if (slot >= btrfs_header_nritems(parent))
+ return NULL;
+
+ if (level == 0)
+ return NULL;
+
+ ret = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot),
+ btrfs_node_ptr_generation(parent, slot));
+ if (!extent_buffer_uptodate(ret))
+ return ERR_PTR(-EIO);
+
+ if (btrfs_header_level(ret) != level - 1) {
+ error("child eb corrupted: parent bytenr=%llu item=%d parent level=%d child level=%d",
+ btrfs_header_bytenr(parent), slot,
+ btrfs_header_level(parent), btrfs_header_level(ret));
+ free_extent_buffer(ret);
+ return ERR_PTR(-EIO);
}
+ return ret;
}
-void btrfs_free_path(struct btrfs_path *p)
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+ u64 iobjectid, u64 ioff, u8 key_type,
+ struct btrfs_key *found_key)
{
- int i;
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+ struct btrfs_path *path;
+
+ key.type = key_type;
+ key.objectid = iobjectid;
+ key.offset = ioff;
+
+ if (found_path == NULL) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ } else
+ path = found_path;
+
+ ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ if ((ret < 0) || (found_key == NULL))
+ goto out;
+
+ eb = path->nodes[0];
+ if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(fs_root, path);
+ if (ret)
+ goto out;
+ eb = path->nodes[0];
+ }
- for (i = 0; i < BTRFS_MAX_LEVEL; ++i) {
- if (p->nodes[i])
- free(p->nodes[i]);
+ btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
+ if (found_key->type != key.type ||
+ found_key->objectid != key.objectid) {
+ ret = 1;
+ goto out;
}
- clear_path(p);
+out:
+ if (path != found_path)
+ btrfs_free_path(path);
+ return ret;
}
-static int read_tree_node(u64 physical, union btrfs_tree_node **buf)
+/*
+ * look for key in the tree. path is filled in with nodes along the way
+ * if key is found, we return zero and you can find the item in the leaf
+ * level of the path (level 0)
+ *
+ * If the key isn't found, the path points to the slot where it should
+ * be inserted, and 1 is returned. If there are other errors during the
+ * search a negative error number is returned.
+ *
+ * if ins_len > 0, nodes and leaves will be split as we walk down the
+ * tree. if ins_len < 0, nodes will be merged as we walk down the tree (if
+ * possible)
+ *
+ * NOTE: This version has no COW ability, thus we expect trans == NULL,
+ * ins_len == 0 and cow == 0.
+ */
+int btrfs_search_slot(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const struct btrfs_key *key,
+ struct btrfs_path *p, int ins_len, int cow)
{
- ALLOC_CACHE_ALIGN_BUFFER(struct btrfs_header, hdr,
- sizeof(struct btrfs_header));
- unsigned long size, offset = sizeof(*hdr);
- union btrfs_tree_node *res;
- u32 i;
-
- if (!btrfs_devread(physical, sizeof(*hdr), hdr))
- return -1;
-
- btrfs_header_to_cpu(hdr);
-
- if (hdr->level)
- size = sizeof(struct btrfs_node)
- + hdr->nritems * sizeof(struct btrfs_key_ptr);
- else
- size = btrfs_info.sb.nodesize;
-
- res = malloc_cache_aligned(size);
- if (!res) {
- debug("%s: malloc failed\n", __func__);
- return -1;
- }
-
- if (!btrfs_devread(physical + offset, size - offset,
- ((u8 *) res) + offset)) {
- free(res);
- return -1;
+ struct extent_buffer *b;
+ int slot;
+ int ret;
+ int level;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ u8 lowest_level = 0;
+
+ assert(trans == NULL && ins_len == 0 && cow == 0);
+ lowest_level = p->lowest_level;
+ WARN_ON(lowest_level && ins_len > 0);
+ WARN_ON(p->nodes[0] != NULL);
+
+ b = root->node;
+ extent_buffer_get(b);
+ while (b) {
+ level = btrfs_header_level(b);
+ /*
+ if (cow) {
+ int wret;
+ wret = btrfs_cow_block(trans, root, b,
+ p->nodes[level + 1],
+ p->slots[level + 1],
+ &b);
+ if (wret) {
+ free_extent_buffer(b);
+ return wret;
+ }
+ }
+ */
+ BUG_ON(!cow && ins_len);
+ if (level != btrfs_header_level(b))
+ WARN_ON(1);
+ level = btrfs_header_level(b);
+ p->nodes[level] = b;
+ ret = check_block(fs_info, p, level);
+ if (ret)
+ return -1;
+ ret = btrfs_bin_search(b, key, &slot);
+ if (level != 0) {
+ if (ret && slot > 0)
+ slot -= 1;
+ p->slots[level] = slot;
+ /*
+ if ((p->search_for_split || ins_len > 0) &&
+ btrfs_header_nritems(b) >=
+ BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 3) {
+ int sret = split_node(trans, root, p, level);
+ BUG_ON(sret > 0);
+ if (sret)
+ return sret;
+ b = p->nodes[level];
+ slot = p->slots[level];
+ } else if (ins_len < 0) {
+ int sret = balance_level(trans, root, p,
+ level);
+ if (sret)
+ return sret;
+ b = p->nodes[level];
+ if (!b) {
+ btrfs_release_path(p);
+ goto again;
+ }
+ slot = p->slots[level];
+ BUG_ON(btrfs_header_nritems(b) == 1);
+ }
+ */
+ /* this is only true while dropping a snapshot */
+ if (level == lowest_level)
+ break;
+
+ b = read_node_slot(fs_info, b, slot);
+ if (!extent_buffer_uptodate(b))
+ return -EIO;
+ } else {
+ p->slots[level] = slot;
+ /*
+ if (ins_len > 0 &&
+ ins_len > btrfs_leaf_free_space(b)) {
+ int sret = split_leaf(trans, root, key,
+ p, ins_len, ret == 0);
+ BUG_ON(sret > 0);
+ if (sret)
+ return sret;
+ }
+ */
+ return ret;
+ }
}
-
- memcpy(&res->header, hdr, sizeof(*hdr));
- if (hdr->level)
- for (i = 0; i < hdr->nritems; ++i)
- btrfs_key_ptr_to_cpu(&res->node.ptrs[i]);
- else
- for (i = 0; i < hdr->nritems; ++i)
- btrfs_item_to_cpu(&res->leaf.items[i]);
-
- *buf = res;
-
- return 0;
+ return 1;
}
-int btrfs_search_tree(const struct btrfs_root *root, struct btrfs_key *key,
- struct btrfs_path *p)
+/*
+ * Helper to use instead of search slot if no exact match is needed but
+ * instead the next or previous item should be returned.
+ * When find_higher is true, the next higher item is returned, the next lower
+ * otherwise.
+ * When return_any and find_higher are both true, and no higher item is found,
+ * return the next lower instead.
+ * When return_any is true and find_higher is false, and no lower item is found,
+ * return the next higher instead.
+ * It returns 0 if any item is found, 1 if none is found (tree empty), and
+ * < 0 on error
+ */
+int btrfs_search_slot_for_read(struct btrfs_root *root,
+ const struct btrfs_key *key,
+ struct btrfs_path *p, int find_higher,
+ int return_any)
{
- u8 lvl, prev_lvl;
- int i, slot, ret;
- u64 logical, physical;
- union btrfs_tree_node *buf;
-
- clear_path(p);
-
- logical = root->bytenr;
-
- for (i = 0; i < BTRFS_MAX_LEVEL; ++i) {
- physical = btrfs_map_logical_to_physical(logical);
- if (physical == -1ULL)
- goto err;
-
- if (read_tree_node(physical, &buf))
- goto err;
-
- lvl = buf->header.level;
- if (i && prev_lvl != lvl + 1) {
- printf("%s: invalid level in header at %llu\n",
- __func__, logical);
- goto err;
+ int ret;
+ struct extent_buffer *leaf;
+
+again:
+ ret = btrfs_search_slot(NULL, root, key, p, 0, 0);
+ if (ret <= 0)
+ return ret;
+ /*
+ * A return value of 1 means the path is at the position where the item
+ * should be inserted. Normally this is the next bigger item, but in
+ * case the previous item is the last in a leaf, path points to the
+ * first free slot in the previous leaf, i.e. at an invalid item.
+ */
+ leaf = p->nodes[0];
+
+ if (find_higher) {
+ if (p->slots[0] >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, p);
+ if (ret <= 0)
+ return ret;
+ if (!return_any)
+ return 1;
+ /*
+ * No higher item found, return the next lower instead
+ */
+ return_any = 0;
+ find_higher = 0;
+ btrfs_release_path(p);
+ goto again;
}
- prev_lvl = lvl;
-
- ret = btrfs_bin_search(buf, key, &slot);
- if (ret < 0)
- goto err;
- if (ret && slot > 0 && lvl)
- slot -= 1;
-
- p->slots[lvl] = slot;
- p->nodes[lvl] = buf;
-
- if (lvl) {
- logical = buf->node.ptrs[slot].blockptr;
- } else {
+ } else {
+ if (p->slots[0] == 0) {
+ ret = btrfs_prev_leaf(root, p);
+ if (ret < 0)
+ return ret;
+ if (!ret) {
+ leaf = p->nodes[0];
+ if (p->slots[0] == btrfs_header_nritems(leaf))
+ p->slots[0]--;
+ return 0;
+ }
+ if (!return_any)
+ return 1;
/*
- * The path might be invalid if:
- * cur leaf max < searched value < next leaf min
- *
- * Jump to the next valid element if it exists.
+ * No lower item found, return the next higher instead
*/
- if (slot >= buf->header.nritems)
- if (btrfs_next_slot(p) < 0)
- goto err;
- break;
+ return_any = 0;
+ find_higher = 1;
+ btrfs_release_path(p);
+ goto again;
+ } else {
+ --p->slots[0];
}
}
-
return 0;
-err:
- btrfs_free_path(p);
- return -1;
}
-static int jump_leaf(struct btrfs_path *path, int dir)
+/*
+ * how many bytes are required to store the items in a leaf. start
+ * and nr indicate which items in the leaf to check. This totals up the
+ * space used both by the item structs and the item data
+ */
+static int leaf_space_used(struct extent_buffer *l, int start, int nr)
{
- struct btrfs_path p;
- u32 slot;
- int level = 1, from_level, i;
-
- dir = dir >= 0 ? 1 : -1;
+ int data_len;
+ int nritems = btrfs_header_nritems(l);
+ int end = min(nritems, start + nr) - 1;
+
+ if (!nr)
+ return 0;
+ data_len = btrfs_item_end_nr(l, start);
+ data_len = data_len - btrfs_item_offset_nr(l, end);
+ data_len += sizeof(struct btrfs_item) * nr;
+ WARN_ON(data_len < 0);
+ return data_len;
+}
- p = *path;
+/*
+ * The space between the end of the leaf items and
+ * the start of the leaf data. IOW, how much room
+ * the leaf has left for both items and data
+ */
+int btrfs_leaf_free_space(struct extent_buffer *leaf)
+{
+ int nritems = btrfs_header_nritems(leaf);
+ u32 leaf_data_size;
+ int ret;
+
+ BUG_ON(leaf->fs_info && leaf->fs_info->nodesize != leaf->len);
+ leaf_data_size = __BTRFS_LEAF_DATA_SIZE(leaf->len);
+ ret = leaf_data_size - leaf_space_used(leaf, 0 ,nritems);
+ if (ret < 0) {
+ printk("leaf free space ret %d, leaf data size %u, used %d nritems %d\n",
+ ret, leaf_data_size, leaf_space_used(leaf, 0, nritems),
+ nritems);
+ }
+ return ret;
+}
- while (level < BTRFS_MAX_LEVEL) {
- if (!p.nodes[level])
+/*
+ * walk up the tree as far as required to find the previous leaf.
+ * returns 0 if it found something or 1 if there are no lesser leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+{
+ int slot;
+ int level = 1;
+ struct extent_buffer *c;
+ struct extent_buffer *next = NULL;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
+ while(level < BTRFS_MAX_LEVEL) {
+ if (!path->nodes[level])
return 1;
- slot = p.slots[level];
- if ((dir > 0 && slot + dir >= p.nodes[level]->header.nritems)
- || (dir < 0 && !slot))
+ slot = path->slots[level];
+ c = path->nodes[level];
+ if (slot == 0) {
level++;
- else
- break;
- }
-
- if (level == BTRFS_MAX_LEVEL)
- return 1;
-
- p.slots[level] = slot + dir;
- level--;
- from_level = level;
-
- while (level >= 0) {
- u64 logical, physical;
-
- slot = p.slots[level + 1];
- logical = p.nodes[level + 1]->node.ptrs[slot].blockptr;
- physical = btrfs_map_logical_to_physical(logical);
- if (physical == -1ULL)
- goto err;
-
- if (read_tree_node(physical, &p.nodes[level]))
- goto err;
+ if (level == BTRFS_MAX_LEVEL)
+ return 1;
+ continue;
+ }
+ slot--;
- if (dir > 0)
- p.slots[level] = 0;
- else
- p.slots[level] = p.nodes[level]->header.nritems - 1;
+ next = read_node_slot(fs_info, c, slot);
+ if (!extent_buffer_uptodate(next)) {
+ if (IS_ERR(next))
+ return PTR_ERR(next);
+ return -EIO;
+ }
+ break;
+ }
+ path->slots[level] = slot;
+ while(1) {
level--;
+ c = path->nodes[level];
+ free_extent_buffer(c);
+ slot = btrfs_header_nritems(next);
+ if (slot != 0)
+ slot--;
+ path->nodes[level] = next;
+ path->slots[level] = slot;
+ if (!level)
+ break;
+ next = read_node_slot(fs_info, next, slot);
+ if (!extent_buffer_uptodate(next)) {
+ if (IS_ERR(next))
+ return PTR_ERR(next);
+ return -EIO;
+ }
}
-
- /* Free rewritten nodes in path */
- for (i = 0; i <= from_level; ++i)
- free(path->nodes[i]);
-
- *path = p;
return 0;
-
-err:
- /* Free rewritten nodes in p */
- for (i = level + 1; i <= from_level; ++i)
- free(p.nodes[i]);
- return -1;
}
-int btrfs_prev_slot(struct btrfs_path *p)
+/*
+ * Walk up the tree as far as necessary to find the next sibling tree block.
+ * More generic version of btrfs_next_leaf(), as it could find sibling nodes
+ * if @path->lowest_level is not 0.
+ *
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path)
{
- if (!p->slots[0])
- return jump_leaf(p, -1);
+ int slot;
+ int level = path->lowest_level + 1;
+ struct extent_buffer *c;
+ struct extent_buffer *next = NULL;
+
+ BUG_ON(path->lowest_level + 1 >= BTRFS_MAX_LEVEL);
+ do {
+ if (!path->nodes[level])
+ return 1;
- p->slots[0]--;
+ slot = path->slots[level] + 1;
+ c = path->nodes[level];
+ if (slot >= btrfs_header_nritems(c)) {
+ level++;
+ if (level == BTRFS_MAX_LEVEL)
+ return 1;
+ continue;
+ }
+
+ next = read_node_slot(fs_info, c, slot);
+ if (!extent_buffer_uptodate(next))
+ return -EIO;
+ break;
+ } while (level < BTRFS_MAX_LEVEL);
+ path->slots[level] = slot;
+ while(1) {
+ level--;
+ c = path->nodes[level];
+ free_extent_buffer(c);
+ path->nodes[level] = next;
+ path->slots[level] = 0;
+ if (level == path->lowest_level)
+ break;
+ next = read_node_slot(fs_info, next, 0);
+ if (!extent_buffer_uptodate(next))
+ return -EIO;
+ }
return 0;
}
-int btrfs_next_slot(struct btrfs_path *p)
+int btrfs_previous_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid,
+ int type)
{
- struct btrfs_leaf *leaf = &p->nodes[0]->leaf;
-
- if (p->slots[0] + 1 >= leaf->header.nritems)
- return jump_leaf(p, 1);
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ u32 nritems;
+ int ret;
+
+ while(1) {
+ if (path->slots[0] == 0) {
+ ret = btrfs_prev_leaf(root, path);
+ if (ret != 0)
+ return ret;
+ } else {
+ path->slots[0]--;
+ }
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ if (nritems == 0)
+ return 1;
+ if (path->slots[0] == nritems)
+ path->slots[0]--;
- p->slots[0]++;
- return 0;
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid < min_objectid)
+ break;
+ if (found_key.type == type)
+ return 0;
+ if (found_key.objectid == min_objectid &&
+ found_key.type < type)
+ break;
+ }
+ return 1;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 65c152a52f..219c410b18 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -11,16 +11,17 @@
#include <common.h>
#include <compiler.h>
-#include "btrfs_tree.h"
-
-#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
+#include <linux/rbtree.h>
+#include <linux/bug.h>
+#include <linux/unaligned/le_byteshift.h>
+#include <u-boot/crc.h>
+#include "kernel-shared/btrfs_tree.h"
+#include "crypto/hash.h"
+#include "compat.h"
+#include "extent-io.h"
#define BTRFS_MAX_MIRRORS 3
-#define BTRFS_MAX_LEVEL 8
-
-#define BTRFS_COMPAT_EXTENT_TREE_V0
-
/*
* the max metadata block size. This limit is somewhat artificial,
* but the memmove costs go through the roof for larger blocks.
@@ -28,23 +29,38 @@
#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
/*
- * we can actually store much bigger names, but lets not confuse the rest
- * of linux
- */
-#define BTRFS_NAME_LEN 255
-
-/*
* Theoretical limit is larger, but we keep this down to a sane
* value. That should limit greatly the possibility of collisions on
* inode ref items.
*/
#define BTRFS_LINK_MAX 65535U
-static const int btrfs_csum_sizes[] = { 4 };
-
/* four bytes for CRC32 */
#define BTRFS_EMPTY_DIR_SIZE 0
+struct btrfs_mapping_tree {
+ struct cache_tree cache_tree;
+};
+
+static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+{
+ BUG_ON(num_stripes == 0);
+ return sizeof(struct btrfs_chunk) +
+ sizeof(struct btrfs_stripe) * (num_stripes - 1);
+}
+
+#define __BTRFS_LEAF_DATA_SIZE(bs) ((bs) - sizeof(struct btrfs_header))
+#define BTRFS_LEAF_DATA_SIZE(fs_info) \
+ (__BTRFS_LEAF_DATA_SIZE(fs_info->nodesize))
+
+struct btrfs_path {
+ struct extent_buffer *nodes[BTRFS_MAX_LEVEL];
+ int slots[BTRFS_MAX_LEVEL];
+
+ /* keep some upper locks as we walk down */
+ u8 lowest_level;
+};
+
/* ioprio of readahead is set to idle */
#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
@@ -52,6 +68,92 @@ static const int btrfs_csum_sizes[] = { 4 };
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
+enum btrfs_tree_block_status {
+ BTRFS_TREE_BLOCK_CLEAN,
+ BTRFS_TREE_BLOCK_INVALID_NRITEMS,
+ BTRFS_TREE_BLOCK_INVALID_PARENT_KEY,
+ BTRFS_TREE_BLOCK_BAD_KEY_ORDER,
+ BTRFS_TREE_BLOCK_INVALID_LEVEL,
+ BTRFS_TREE_BLOCK_INVALID_FREE_SPACE,
+ BTRFS_TREE_BLOCK_INVALID_OFFSETS,
+};
+
+struct btrfs_root {
+ struct extent_buffer *node;
+ struct btrfs_root_item root_item;
+ struct btrfs_key root_key;
+ struct btrfs_fs_info *fs_info;
+ u64 objectid;
+ u64 last_trans;
+
+ int ref_cows;
+ int track_dirty;
+
+ u32 type;
+ u64 last_inode_alloc;
+
+ struct rb_node rb_node;
+};
+
+struct btrfs_trans_handle;
+struct btrfs_device;
+struct btrfs_fs_devices;
+struct btrfs_fs_info {
+ u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+ u8 *new_chunk_tree_uuid;
+ struct btrfs_root *fs_root;
+ struct btrfs_root *tree_root;
+ struct btrfs_root *chunk_root;
+ struct btrfs_root *csum_root;
+
+ struct rb_root fs_root_tree;
+
+ struct extent_io_tree extent_cache;
+
+ /* logical->physical extent mapping */
+ struct btrfs_mapping_tree mapping_tree;
+
+ u64 last_trans_committed;
+
+ struct btrfs_super_block *super_copy;
+
+ struct btrfs_fs_devices *fs_devices;
+
+ /* Cached block sizes */
+ u32 nodesize;
+ u32 sectorsize;
+ u32 stripesize;
+};
+
+static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info)
+{
+ return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item);
+}
+
+static inline u32 BTRFS_NODEPTRS_PER_BLOCK(const struct btrfs_fs_info *info)
+{
+ return BTRFS_LEAF_DATA_SIZE(info) / sizeof(struct btrfs_key_ptr);
+}
+
+static inline u32 BTRFS_NODEPTRS_PER_EXTENT_BUFFER(const struct extent_buffer *eb)
+{
+ BUG_ON(eb->fs_info && eb->fs_info->nodesize != eb->len);
+ return __BTRFS_LEAF_DATA_SIZE(eb->len) / sizeof(struct btrfs_key_ptr);
+}
+
+#define BTRFS_FILE_EXTENT_INLINE_DATA_START \
+ (offsetof(struct btrfs_file_extent_item, disk_bytenr))
+static inline u32 BTRFS_MAX_INLINE_DATA_SIZE(const struct btrfs_fs_info *info)
+{
+ return BTRFS_MAX_ITEM_SIZE(info) -
+ BTRFS_FILE_EXTENT_INLINE_DATA_START;
+}
+
+static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
+{
+ return BTRFS_MAX_ITEM_SIZE(info) - sizeof(struct btrfs_dir_item);
+}
+
/*
* File system states
*/
@@ -61,273 +163,1136 @@ static const int btrfs_csum_sizes[] = { 4 };
#define BTRFS_FS_STATE_DEV_REPLACING 3
#define BTRFS_FS_STATE_DUMMY_FS_INFO 4
-#define BTRFS_BACKREF_REV_MAX 256
-#define BTRFS_BACKREF_REV_SHIFT 56
-#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
- BTRFS_BACKREF_REV_SHIFT)
+#define read_eb_member(eb, ptr, type, member, result) ( \
+ read_extent_buffer(eb, (char *)(result), \
+ ((unsigned long)(ptr)) + \
+ offsetof(type, member), \
+ sizeof(((type *)0)->member)))
-#define BTRFS_OLD_BACKREF_REV 0
-#define BTRFS_MIXED_BACKREF_REV 1
+#define write_eb_member(eb, ptr, type, member, result) ( \
+ write_extent_buffer(eb, (char *)(result), \
+ ((unsigned long)(ptr)) + \
+ offsetof(type, member), \
+ sizeof(((type *)0)->member)))
-/*
- * every tree block (leaf or node) starts with this header.
- */
-struct btrfs_header {
- /* these first four must match the super block */
- __u8 csum[BTRFS_CSUM_SIZE];
- __u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
- __u64 bytenr; /* which block this node is supposed to live in */
- __u64 flags;
-
- /* allowed to be different from the super from here on down */
- __u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
- __u64 generation;
- __u64 owner;
- __u32 nritems;
- __u8 level;
-} __attribute__ ((__packed__));
+#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb) \
+{ \
+ const struct btrfs_header *h = (struct btrfs_header *)eb->data; \
+ return le##bits##_to_cpu(h->member); \
+} \
+static inline void btrfs_set_##name(struct extent_buffer *eb, \
+ u##bits val) \
+{ \
+ struct btrfs_header *h = (struct btrfs_header *)eb->data; \
+ h->member = cpu_to_le##bits(val); \
+}
-/*
- * this is a very generous portion of the super block, giving us
- * room to translate 14 chunks with 3 stripes each.
- */
-#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+#define BTRFS_SETGET_FUNCS(name, type, member, bits) \
+static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
+ const type *s) \
+{ \
+ unsigned long offset = (unsigned long)s; \
+ const type *p = (type *) (eb->data + offset); \
+ return get_unaligned_le##bits(&p->member); \
+} \
+static inline void btrfs_set_##name(struct extent_buffer *eb, \
+ type *s, u##bits val) \
+{ \
+ unsigned long offset = (unsigned long)s; \
+ type *p = (type *) (eb->data + offset); \
+ put_unaligned_le##bits(val, &p->member); \
+}
-/*
- * just in case we somehow lose the roots and are not able to mount,
- * we store an array of the roots from previous transactions
- * in the super.
- */
-#define BTRFS_NUM_BACKUP_ROOTS 4
-struct btrfs_root_backup {
- __u64 tree_root;
- __u64 tree_root_gen;
-
- __u64 chunk_root;
- __u64 chunk_root_gen;
-
- __u64 extent_root;
- __u64 extent_root_gen;
-
- __u64 fs_root;
- __u64 fs_root_gen;
-
- __u64 dev_root;
- __u64 dev_root_gen;
-
- __u64 csum_root;
- __u64 csum_root_gen;
-
- __u64 total_bytes;
- __u64 bytes_used;
- __u64 num_devices;
- /* future */
- __u64 unused_64[4];
-
- __u8 tree_root_level;
- __u8 chunk_root_level;
- __u8 extent_root_level;
- __u8 fs_root_level;
- __u8 dev_root_level;
- __u8 csum_root_level;
- /* future and to align */
- __u8 unused_8[10];
-} __attribute__ ((__packed__));
+#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \
+static inline u##bits btrfs_##name(const type *s) \
+{ \
+ return le##bits##_to_cpu(s->member); \
+} \
+static inline void btrfs_set_##name(type *s, u##bits val) \
+{ \
+ s->member = cpu_to_le##bits(val); \
+}
-/*
- * the super block basically lists the main trees of the FS
- * it currently lacks any block count etc etc
- */
-struct btrfs_super_block {
- __u8 csum[BTRFS_CSUM_SIZE];
- /* the first 4 fields must match struct btrfs_header */
- __u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
- __u64 bytenr; /* this block number */
- __u64 flags;
-
- /* allowed to be different from the btrfs_header from here own down */
- __u64 magic;
- __u64 generation;
- __u64 root;
- __u64 chunk_root;
- __u64 log_root;
-
- /* this will help find the new super based on the log root */
- __u64 log_root_transid;
- __u64 total_bytes;
- __u64 bytes_used;
- __u64 root_dir_objectid;
- __u64 num_devices;
- __u32 sectorsize;
- __u32 nodesize;
- __u32 __unused_leafsize;
- __u32 stripesize;
- __u32 sys_chunk_array_size;
- __u64 chunk_root_generation;
- __u64 compat_flags;
- __u64 compat_ro_flags;
- __u64 incompat_flags;
- __u16 csum_type;
- __u8 root_level;
- __u8 chunk_root_level;
- __u8 log_root_level;
- struct btrfs_dev_item dev_item;
-
- char label[BTRFS_LABEL_SIZE];
-
- __u64 cache_generation;
- __u64 uuid_tree_generation;
-
- /* future expansion */
- __u64 reserved[30];
- __u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
- struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
-} __attribute__ ((__packed__));
+BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_FUNCS(device_total_bytes, struct btrfs_dev_item, total_bytes, 64);
+BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64);
+BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32);
+BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32);
+BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item,
+ start_offset, 64);
+BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32);
+BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32);
+BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8);
+BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8);
+BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64);
-/*
- * Compat flags that we support. If any incompat flags are set other than the
- * ones specified below then we will fail to mount
- */
-#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
-#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL
-#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
-
-#define BTRFS_FEATURE_COMPAT_RO_SUPP \
- (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
- BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
-
-#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
-#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
-
-#define BTRFS_FEATURE_INCOMPAT_SUPP \
- (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
- BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
- BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
- BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
- BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
- BTRFS_FEATURE_INCOMPAT_RAID56 | \
- BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
- BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
- BTRFS_FEATURE_INCOMPAT_NO_HOLES)
-
-#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
- (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
-#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL
+BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item,
+ total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item,
+ bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item,
+ io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item,
+ io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item,
+ sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item,
+ dev_group, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item,
+ seek_speed, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
+ bandwidth, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
+ generation, 64);
-/*
- * A leaf is full of items. offset and size tell us where to find
- * the item in the leaf (relative to the start of the data area)
- */
-struct btrfs_item {
- struct btrfs_key key;
- __u32 offset;
- __u32 size;
-} __attribute__ ((__packed__));
+static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+{
+ return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+}
-/*
- * leaves have an item area and a data area:
- * [item0, item1....itemN] [free space] [dataN...data1, data0]
- *
- * The data is separate from the items to get the keys closer together
- * during searches.
- */
-struct btrfs_leaf {
- struct btrfs_header header;
- struct btrfs_item items[];
-} __attribute__ ((__packed__));
+static inline char *btrfs_device_fsid(struct btrfs_dev_item *d)
+{
+ return (char *)d + offsetof(struct btrfs_dev_item, fsid);
+}
+
+BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64);
+BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32);
+BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32);
+BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32);
+BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16);
+BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16);
+BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s)
+{
+ return (char *)s + offsetof(struct btrfs_stripe, dev_uuid);
+}
+
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk,
+ stripe_len, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk,
+ io_align, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk,
+ io_width, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk,
+ sector_size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk,
+ num_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk,
+ sub_stripes, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64);
+
+static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c,
+ int nr)
+{
+ unsigned long offset = (unsigned long)c;
+ offset += offsetof(struct btrfs_chunk, stripe);
+ offset += nr * sizeof(struct btrfs_stripe);
+ return (struct btrfs_stripe *)offset;
+}
+
+static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr));
+}
+
+static inline u64 btrfs_stripe_offset_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_offset_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr,
+ u64 val)
+{
+ btrfs_set_stripe_offset(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+static inline u64 btrfs_stripe_devid_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr)
+{
+ return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr));
+}
+
+static inline void btrfs_set_stripe_devid_nr(struct extent_buffer *eb,
+ struct btrfs_chunk *c, int nr,
+ u64 val)
+{
+ btrfs_set_stripe_devid(eb, btrfs_stripe_nr(c, nr), val);
+}
+
+/* struct btrfs_block_group_item */
+BTRFS_SETGET_STACK_FUNCS(block_group_used, struct btrfs_block_group_item,
+ used, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_used, struct btrfs_block_group_item,
+ used, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_chunk_objectid,
+ struct btrfs_block_group_item, chunk_objectid, 64);
+
+BTRFS_SETGET_FUNCS(disk_block_group_chunk_objectid,
+ struct btrfs_block_group_item, chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(disk_block_group_flags,
+ struct btrfs_block_group_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(block_group_flags,
+ struct btrfs_block_group_item, flags, 64);
+
+/* struct btrfs_free_space_info */
+BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info,
+ extent_count, 32);
+BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32);
+
+/* struct btrfs_inode_ref */
+BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_ref_name_len, struct btrfs_inode_ref, name_len, 16);
+BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64);
+
+/* struct btrfs_inode_extref */
+BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref,
+ parent_objectid, 64);
+BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref,
+ name_len, 16);
+BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64);
+
+/* struct btrfs_inode_item */
+BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64);
+BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64);
+BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_inode_generation,
+ struct btrfs_inode_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence,
+ struct btrfs_inode_item, sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_transid,
+ struct btrfs_inode_item, transid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_size,
+ struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes,
+ struct btrfs_inode_item, nbytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group,
+ struct btrfs_inode_item, block_group, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink,
+ struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_uid,
+ struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_gid,
+ struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_mode,
+ struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev,
+ struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_flags,
+ struct btrfs_inode_item, flags, 64);
+
+static inline struct btrfs_timespec *
+btrfs_inode_atime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, atime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_mtime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, mtime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, ctime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec *
+btrfs_inode_otime(struct btrfs_inode_item *inode_item)
+{
+ unsigned long ptr = (unsigned long)inode_item;
+ ptr += offsetof(struct btrfs_inode_item, otime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec,
+ sec, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec,
+ nsec, 32);
+
+/* struct btrfs_dev_extent */
+BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
+ chunk_tree, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent,
+ chunk_objectid, 64);
+BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
+ chunk_offset, 64);
+BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_dev_extent_length, struct btrfs_dev_extent,
+ length, 64);
+
+static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
+{
+ unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
+ return (u8 *)((unsigned long)dev + ptr);
+}
+
+
+/* struct btrfs_extent_item */
+BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_refs, struct btrfs_extent_item, refs, 64);
+BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_flags, struct btrfs_extent_item, flags, 64);
+
+BTRFS_SETGET_FUNCS(extent_refs_v0, struct btrfs_extent_item_v0, refs, 32);
+
+BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8);
+
+static inline void btrfs_tree_block_key(struct extent_buffer *eb,
+ struct btrfs_tree_block_info *item,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+static inline void btrfs_set_tree_block_key(struct extent_buffer *eb,
+ struct btrfs_tree_block_info *item,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
+}
+
+BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref,
+ root, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref,
+ objectid, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref,
+ offset, 64);
+BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref,
+ count, 32);
+
+BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref,
+ count, 32);
+
+BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref,
+ type, 8);
+BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref,
+ offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_type,
+ struct btrfs_extent_inline_ref, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_extent_inline_ref_offset,
+ struct btrfs_extent_inline_ref, offset, 64);
+
+static inline u32 btrfs_extent_inline_ref_size(int type)
+{
+ if (type == BTRFS_TREE_BLOCK_REF_KEY ||
+ type == BTRFS_SHARED_BLOCK_REF_KEY)
+ return sizeof(struct btrfs_extent_inline_ref);
+ if (type == BTRFS_SHARED_DATA_REF_KEY)
+ return sizeof(struct btrfs_shared_data_ref) +
+ sizeof(struct btrfs_extent_inline_ref);
+ if (type == BTRFS_EXTENT_DATA_REF_KEY)
+ return sizeof(struct btrfs_extent_data_ref) +
+ offsetof(struct btrfs_extent_inline_ref, offset);
+ BUG();
+ return 0;
+}
+
+BTRFS_SETGET_FUNCS(ref_root_v0, struct btrfs_extent_ref_v0, root, 64);
+BTRFS_SETGET_FUNCS(ref_generation_v0, struct btrfs_extent_ref_v0,
+ generation, 64);
+BTRFS_SETGET_FUNCS(ref_objectid_v0, struct btrfs_extent_ref_v0, objectid, 64);
+BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
+
+/* struct btrfs_node */
+BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
+BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
+
+static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_blockptr(struct extent_buffer *eb,
+ int nr, u64 val)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline u64 btrfs_node_ptr_generation(struct extent_buffer *eb, int nr)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr);
+}
+
+static inline void btrfs_set_node_ptr_generation(struct extent_buffer *eb,
+ int nr, u64 val)
+{
+ unsigned long ptr;
+ ptr = offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+ btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val);
+}
+
+static inline unsigned long btrfs_node_key_ptr_offset(int nr)
+{
+ return offsetof(struct btrfs_node, ptrs) +
+ sizeof(struct btrfs_key_ptr) * nr;
+}
+
+static inline void btrfs_node_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ unsigned long ptr;
+ ptr = btrfs_node_key_ptr_offset(nr);
+ read_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+ struct btrfs_key_ptr, key, disk_key);
+}
+
+static inline void btrfs_set_node_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ unsigned long ptr;
+ ptr = btrfs_node_key_ptr_offset(nr);
+ write_eb_member(eb, (struct btrfs_key_ptr *)ptr,
+ struct btrfs_key_ptr, key, disk_key);
+}
+
+/* struct btrfs_item */
+BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
+BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
+
+static inline unsigned long btrfs_item_nr_offset(int nr)
+{
+ return offsetof(struct btrfs_leaf, items) +
+ sizeof(struct btrfs_item) * nr;
+}
+
+static inline struct btrfs_item *btrfs_item_nr(int nr)
+{
+ return (struct btrfs_item *)btrfs_item_nr_offset(nr);
+}
+
+static inline u32 btrfs_item_end(struct extent_buffer *eb,
+ struct btrfs_item *item)
+{
+ return btrfs_item_offset(eb, item) + btrfs_item_size(eb, item);
+}
+
+static inline u32 btrfs_item_end_nr(struct extent_buffer *eb, int nr)
+{
+ return btrfs_item_end(eb, btrfs_item_nr(nr));
+}
+
+static inline u32 btrfs_item_offset_nr(const struct extent_buffer *eb, int nr)
+{
+ return btrfs_item_offset(eb, btrfs_item_nr(nr));
+}
+
+static inline u32 btrfs_item_size_nr(struct extent_buffer *eb, int nr)
+{
+ return btrfs_item_size(eb, btrfs_item_nr(nr));
+}
+
+static inline void btrfs_item_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ struct btrfs_item *item = btrfs_item_nr(nr);
+ read_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+static inline void btrfs_set_item_key(struct extent_buffer *eb,
+ struct btrfs_disk_key *disk_key, int nr)
+{
+ struct btrfs_item *item = btrfs_item_nr(nr);
+ write_eb_member(eb, item, struct btrfs_item, key, disk_key);
+}
+
+BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64);
/*
- * all non-leaf blocks are nodes, they hold only keys and pointers to
- * other blocks
+ * struct btrfs_root_ref
*/
-struct btrfs_key_ptr {
- struct btrfs_key key;
- __u64 blockptr;
- __u64 generation;
-} __attribute__ ((__packed__));
+BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64);
+BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64);
+BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16);
-struct btrfs_node {
- struct btrfs_header header;
- struct btrfs_key_ptr ptrs[];
-} __attribute__ ((__packed__));
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_dirid, struct btrfs_root_ref, dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_sequence, struct btrfs_root_ref, sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_root_ref_name_len, struct btrfs_root_ref, name_len, 16);
-union btrfs_tree_node {
- struct btrfs_header header;
- struct btrfs_leaf leaf;
- struct btrfs_node node;
-};
+/* struct btrfs_dir_item */
+BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
+BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
+BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
-typedef __u8 u8;
-typedef __u16 u16;
-typedef __u32 u32;
-typedef __u64 u64;
+BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, data_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, transid, 64);
-struct btrfs_path {
- union btrfs_tree_node *nodes[BTRFS_MAX_LEVEL];
- u32 slots[BTRFS_MAX_LEVEL];
-};
+static inline void btrfs_dir_item_key(struct extent_buffer *eb,
+ struct btrfs_dir_item *item,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
-struct btrfs_root {
- u64 objectid;
- u64 bytenr;
- u64 root_dirid;
-};
+static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
+ struct btrfs_dir_item *item,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, item, struct btrfs_dir_item, location, key);
+}
+
+/* struct btrfs_free_space_header */
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+ num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+ num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+ generation, 64);
+
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+ struct btrfs_free_space_header *h,
+ struct btrfs_disk_key *key)
+{
+ write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+/* struct btrfs_disk_key */
+BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
+ objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64);
+BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8);
+
+static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu,
+ struct btrfs_disk_key *disk)
+{
+ cpu->offset = le64_to_cpu(disk->offset);
+ cpu->type = disk->type;
+ cpu->objectid = le64_to_cpu(disk->objectid);
+}
+
+static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk,
+ const struct btrfs_key *cpu)
+{
+ disk->offset = cpu_to_le64(cpu->offset);
+ disk->type = cpu->type;
+ disk->objectid = cpu_to_le64(cpu->objectid);
+}
+
+static inline void btrfs_node_key_to_cpu(struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
+{
+ struct btrfs_disk_key disk_key;
+ btrfs_node_key(eb, &disk_key, nr);
+ btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_item_key_to_cpu(struct extent_buffer *eb,
+ struct btrfs_key *key, int nr)
+{
+ struct btrfs_disk_key disk_key;
+ btrfs_item_key(eb, &disk_key, nr);
+ btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+static inline void btrfs_dir_item_key_to_cpu(struct extent_buffer *eb,
+ struct btrfs_dir_item *item,
+ struct btrfs_key *key)
+{
+ struct btrfs_disk_key disk_key;
+ btrfs_dir_item_key(eb, item, &disk_key);
+ btrfs_disk_key_to_cpu(key, &disk_key);
+}
+
+/* struct btrfs_header */
+BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header,
+ generation, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
+BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
+BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems,
+ 32);
+BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header,
+ generation, 64);
+
+static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
+{
+ return (btrfs_header_flags(eb) & flag) == flag;
+}
+
+static inline int btrfs_set_header_flag(struct extent_buffer *eb, u64 flag)
+{
+ u64 flags = btrfs_header_flags(eb);
+ btrfs_set_header_flags(eb, flags | flag);
+ return (flags & flag) == flag;
+}
+
+static inline int btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag)
+{
+ u64 flags = btrfs_header_flags(eb);
+ btrfs_set_header_flags(eb, flags & ~flag);
+ return (flags & flag) == flag;
+}
+
+static inline int btrfs_header_backref_rev(struct extent_buffer *eb)
+{
+ u64 flags = btrfs_header_flags(eb);
+ return flags >> BTRFS_BACKREF_REV_SHIFT;
+}
+
+static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
+ int rev)
+{
+ u64 flags = btrfs_header_flags(eb);
+ flags &= ~BTRFS_BACKREF_REV_MASK;
+ flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT;
+ btrfs_set_header_flags(eb, flags);
+}
+
+static inline unsigned long btrfs_header_fsid(void)
+{
+ return offsetof(struct btrfs_header, fsid);
+}
+
+static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+{
+ return offsetof(struct btrfs_header, chunk_tree_uuid);
+}
-int btrfs_comp_keys(struct btrfs_key *, struct btrfs_key *);
-int btrfs_comp_keys_type(struct btrfs_key *, struct btrfs_key *);
-int btrfs_bin_search(union btrfs_tree_node *, struct btrfs_key *, int *);
-void btrfs_free_path(struct btrfs_path *);
-int btrfs_search_tree(const struct btrfs_root *, struct btrfs_key *,
- struct btrfs_path *);
-int btrfs_prev_slot(struct btrfs_path *);
-int btrfs_next_slot(struct btrfs_path *);
+static inline u8 *btrfs_header_csum(struct extent_buffer *eb)
+{
+ unsigned long ptr = offsetof(struct btrfs_header, csum);
+ return (u8 *)ptr;
+}
+
+static inline int btrfs_is_leaf(struct extent_buffer *eb)
+{
+ return (btrfs_header_level(eb) == 0);
+}
+
+/* struct btrfs_root_item */
+BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8);
+BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32);
+BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64);
+BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
+ last_snapshot, 64);
+BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item,
+ generation_v2, 64);
+BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item,
+ ctransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item,
+ otransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item,
+ stransid, 64);
+BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item,
+ rtransid, 64);
+
+static inline struct btrfs_timespec* btrfs_root_ctime(
+ struct btrfs_root_item *root_item)
+{
+ unsigned long ptr = (unsigned long)root_item;
+ ptr += offsetof(struct btrfs_root_item, ctime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec* btrfs_root_otime(
+ struct btrfs_root_item *root_item)
+{
+ unsigned long ptr = (unsigned long)root_item;
+ ptr += offsetof(struct btrfs_root_item, otime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec* btrfs_root_stime(
+ struct btrfs_root_item *root_item)
+{
+ unsigned long ptr = (unsigned long)root_item;
+ ptr += offsetof(struct btrfs_root_item, stime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+static inline struct btrfs_timespec* btrfs_root_rtime(
+ struct btrfs_root_item *root_item)
+{
+ unsigned long ptr = (unsigned long)root_item;
+ ptr += offsetof(struct btrfs_root_item, rtime);
+ return (struct btrfs_timespec *)ptr;
+}
+
+/* struct btrfs_root_backup */
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup,
+ tree_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup,
+ tree_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup,
+ tree_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup,
+ chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup,
+ chunk_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup,
+ chunk_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup,
+ extent_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup,
+ extent_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup,
+ extent_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup,
+ fs_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup,
+ fs_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup,
+ fs_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup,
+ dev_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup,
+ dev_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup,
+ dev_root_level, 8);
+
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup,
+ csum_root, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup,
+ csum_root_gen, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup,
+ csum_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup,
+ total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
+ bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
+ num_devices, 64);
+
+/* struct btrfs_super_block */
+
+BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sys_array_size,
+ struct btrfs_super_block, sys_chunk_array_size, 32);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation,
+ struct btrfs_super_block, chunk_root_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block,
+ root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block,
+ chunk_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block,
+ chunk_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block,
+ log_root, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_transid, struct btrfs_super_block,
+ log_root_transid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block,
+ log_root_level, 8);
+BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block,
+ total_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block,
+ bytes_used, 64);
+BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block,
+ sectorsize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block,
+ nodesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block,
+ stripesize, 32);
+BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block,
+ root_dir_objectid, 64);
+BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
+ num_devices, 64);
+BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block,
+ compat_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block,
+ compat_ro_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
+ incompat_flags, 64);
+BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
+ csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+ cache_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
+ uuid_tree_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
+
+static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
+{
+ return offsetof(struct btrfs_leaf, items);
+}
+
+/* struct btrfs_file_extent_item */
+BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, type, 8);
+
+static inline unsigned long btrfs_file_extent_inline_start(struct
+ btrfs_file_extent_item *e)
+{
+ unsigned long offset = (unsigned long)e;
+ offset += offsetof(struct btrfs_file_extent_item, disk_bytenr);
+ return offset;
+}
+
+static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize)
+{
+ return offsetof(struct btrfs_file_extent_item, disk_bytenr) + datasize;
+}
+
+BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item,
+ disk_bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, struct btrfs_file_extent_item,
+ disk_bytenr, 64);
+BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item,
+ generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, struct btrfs_file_extent_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item,
+ disk_num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item,
+ offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, struct btrfs_file_extent_item,
+ offset, 64);
+BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item,
+ num_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, struct btrfs_file_extent_item,
+ num_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item,
+ ram_bytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, struct btrfs_file_extent_item,
+ ram_bytes, 64);
+BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item,
+ compression, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, struct btrfs_file_extent_item,
+ compression, 8);
+BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item,
+ encryption, 8);
+BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item,
+ other_encoding, 16);
+
+/* btrfs_qgroup_status_item */
+BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item,
+ version, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item,
+ flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item,
+ rescan, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_version,
+ struct btrfs_qgroup_status_item, version, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_generation,
+ struct btrfs_qgroup_status_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_flags,
+ struct btrfs_qgroup_status_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_status_rescan,
+ struct btrfs_qgroup_status_item, rescan, 64);
+
+/* btrfs_qgroup_info_item */
+BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item,
+ generation, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_referenced, struct btrfs_qgroup_info_item,
+ rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_referenced_compressed,
+ struct btrfs_qgroup_info_item, rfer_cmpr, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_exclusive, struct btrfs_qgroup_info_item,
+ excl, 64);
+BTRFS_SETGET_FUNCS(qgroup_info_exclusive_compressed,
+ struct btrfs_qgroup_info_item, excl_cmpr, 64);
-static inline struct btrfs_key *btrfs_path_leaf_key(struct btrfs_path *p) {
- return &p->nodes[0]->leaf.items[p->slots[0]].key;
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation,
+ struct btrfs_qgroup_info_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced,
+ struct btrfs_qgroup_info_item, rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_referenced_compressed,
+ struct btrfs_qgroup_info_item, rfer_cmpr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive,
+ struct btrfs_qgroup_info_item, excl, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_exclusive_compressed,
+ struct btrfs_qgroup_info_item, excl_cmpr, 64);
+
+/* btrfs_qgroup_limit_item */
+BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item,
+ flags, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_referenced, struct btrfs_qgroup_limit_item,
+ max_rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_max_exclusive, struct btrfs_qgroup_limit_item,
+ max_excl, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_referenced, struct btrfs_qgroup_limit_item,
+ rsv_rfer, 64);
+BTRFS_SETGET_FUNCS(qgroup_limit_rsv_exclusive, struct btrfs_qgroup_limit_item,
+ rsv_excl, 64);
+
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_flags,
+ struct btrfs_qgroup_limit_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_referenced,
+ struct btrfs_qgroup_limit_item, max_rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_max_exclusive,
+ struct btrfs_qgroup_limit_item, max_excl, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_referenced,
+ struct btrfs_qgroup_limit_item, rsv_rfer, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_qgroup_limit_rsv_exclusive,
+ struct btrfs_qgroup_limit_item, rsv_excl, 64);
+
+/* btrfs_balance_item */
+BTRFS_SETGET_FUNCS(balance_item_flags, struct btrfs_balance_item, flags, 64);
+
+static inline struct btrfs_disk_balance_args* btrfs_balance_item_data(
+ struct extent_buffer *eb, struct btrfs_balance_item *bi)
+{
+ unsigned long offset = (unsigned long)bi;
+ struct btrfs_balance_item *p;
+ p = (struct btrfs_balance_item *)(eb->data + offset);
+ return &p->data;
}
-static inline struct btrfs_key *
-btrfs_search_tree_key_type(const struct btrfs_root *root, u64 objectid,
- u8 type, struct btrfs_path *path)
+static inline struct btrfs_disk_balance_args* btrfs_balance_item_meta(
+ struct extent_buffer *eb, struct btrfs_balance_item *bi)
{
- struct btrfs_key key, *res;
+ unsigned long offset = (unsigned long)bi;
+ struct btrfs_balance_item *p;
+ p = (struct btrfs_balance_item *)(eb->data + offset);
+ return &p->meta;
+}
- key.objectid = objectid;
- key.type = type;
- key.offset = 0;
+static inline struct btrfs_disk_balance_args* btrfs_balance_item_sys(
+ struct extent_buffer *eb, struct btrfs_balance_item *bi)
+{
+ unsigned long offset = (unsigned long)bi;
+ struct btrfs_balance_item *p;
+ p = (struct btrfs_balance_item *)(eb->data + offset);
+ return &p->sys;
+}
- if (btrfs_search_tree(root, &key, path))
- return NULL;
+static inline u64 btrfs_dev_stats_value(const struct extent_buffer *eb,
+ const struct btrfs_dev_stats_item *ptr,
+ int index)
+{
+ u64 val;
- res = btrfs_path_leaf_key(path);
- if (btrfs_comp_keys_type(&key, res)) {
- btrfs_free_path(path);
- return NULL;
- }
+ read_extent_buffer(eb, &val,
+ offsetof(struct btrfs_dev_stats_item, values) +
+ ((unsigned long)ptr) + (index * sizeof(u64)),
+ sizeof(val));
+ return val;
+}
- return res;
+/*
+ * this returns the number of bytes used by the item on disk, minus the
+ * size of any extent headers. If a file is compressed on disk, this is
+ * the compressed size
+ */
+static inline u32 btrfs_file_extent_inline_item_len(struct extent_buffer *eb,
+ struct btrfs_item *e)
+{
+ unsigned long offset;
+ offset = offsetof(struct btrfs_file_extent_item, disk_bytenr);
+ return btrfs_item_size(eb, e) - offset;
}
-static inline u32 btrfs_path_item_size(struct btrfs_path *p)
+#define btrfs_fs_incompat(fs_info, opt) \
+ __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt)
+
+static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag)
{
- return p->nodes[0]->leaf.items[p->slots[0]].size;
+ struct btrfs_super_block *disk_super;
+ disk_super = fs_info->super_copy;
+ return !!(btrfs_super_incompat_flags(disk_super) & flag);
}
-static inline void *btrfs_leaf_data(struct btrfs_leaf *leaf, u32 slot)
+#define btrfs_fs_compat_ro(fs_info, opt) \
+ __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt)
+
+static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
{
- return ((u8 *) leaf) + sizeof(struct btrfs_header)
- + leaf->items[slot].offset;
+ struct btrfs_super_block *disk_super;
+ disk_super = fs_info->super_copy;
+ return !!(btrfs_super_compat_ro_flags(disk_super) & flag);
}
-static inline void *btrfs_path_leaf_data(struct btrfs_path *p)
+/* helper function to cast into the data area of the leaf. */
+#define btrfs_item_ptr(leaf, slot, type) \
+ ((type *)(btrfs_leaf_data(leaf) + \
+ btrfs_item_offset_nr(leaf, slot)))
+
+#define btrfs_item_ptr_offset(leaf, slot) \
+ ((unsigned long)(btrfs_leaf_data(leaf) + \
+ btrfs_item_offset_nr(leaf, slot)))
+
+static inline u64 btrfs_name_hash(const char *name, int len)
{
- return btrfs_leaf_data(&p->nodes[0]->leaf, p->slots[0]);
+ return (u64)crc32c((u32)~1, (u8 *)name, len);
}
-#define btrfs_item_ptr(l,s,t) \
- ((t *) btrfs_leaf_data((l),(s)))
+/*
+ * Figure the key offset of an extended inode ref
+ */
+static inline u64 btrfs_extref_hash(u64 parent_objectid, const char *name,
+ int len)
+{
+ return crc32(parent_objectid, (u8 *)name, len);
+}
+
+union btrfs_tree_node {
+ struct btrfs_header header;
+ struct btrfs_leaf leaf;
+ struct btrfs_node node;
+};
#define btrfs_path_item_ptr(p,t) \
((t *) btrfs_path_leaf_data((p)))
+u16 btrfs_super_csum_size(const struct btrfs_super_block *s);
+const char *btrfs_super_csum_name(u16 csum_type);
+u16 btrfs_csum_type_size(u16 csum_type);
+size_t btrfs_super_num_csums(void);
+
+/* root-tree.c */
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
+ struct btrfs_root_item *item, struct btrfs_key *key);
+
+/* dir-item.c */
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, int name_len,
+ int mod);
+typedef int (*btrfs_iter_dir_callback_t)(struct btrfs_root *root,
+ struct extent_buffer *eb,
+ struct btrfs_dir_item *di);
+int btrfs_iter_dir(struct btrfs_root *root, u64 ino,
+ btrfs_iter_dir_callback_t callback);
+/* inode.c */
+int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename,
+ struct btrfs_root **root_ret, u64 *ino_ret,
+ u8 *type_ret, int symlink_limit);
+int btrfs_read_extent_inline(struct btrfs_path *path,
+ struct btrfs_file_extent_item *fi, char *dest);
+int btrfs_read_extent_reg(struct btrfs_path *path,
+ struct btrfs_file_extent_item *fi, u64 offset,
+ int len, char *dest);
+
+/* ctree.c */
+int btrfs_comp_cpu_keys(const struct btrfs_key *k1, const struct btrfs_key *k2);
+enum btrfs_tree_block_status
+btrfs_check_node(struct btrfs_fs_info *fs_info,
+ struct btrfs_disk_key *parent_key, struct extent_buffer *buf);
+enum btrfs_tree_block_status
+btrfs_check_leaf(struct btrfs_fs_info *fs_info,
+ struct btrfs_disk_key *parent_key, struct extent_buffer *buf);
+struct extent_buffer *read_node_slot(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *parent, int slot);
+int btrfs_previous_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid,
+ int type);
+int btrfs_next_sibling_tree_block(struct btrfs_fs_info *fs_info,
+ struct btrfs_path *path);
+/*
+ * Walk up the tree as far as necessary to find the next leaf.
+ *
+ * returns 0 if it found something or 1 if there are no greater leaves.
+ * returns < 0 on io errors.
+ */
+static inline int btrfs_next_leaf(struct btrfs_root *root,
+ struct btrfs_path *path)
+{
+ path->lowest_level = 0;
+ return btrfs_next_sibling_tree_block(root->fs_info, path);
+}
+
+static inline int btrfs_next_item(struct btrfs_root *root,
+ struct btrfs_path *p)
+{
+ ++p->slots[0];
+ if (p->slots[0] >= btrfs_header_nritems(p->nodes[0]))
+ return btrfs_next_leaf(root, p);
+ return 0;
+}
+
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
+int btrfs_leaf_free_space(struct extent_buffer *leaf);
+int btrfs_search_slot(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, const struct btrfs_key *key,
+ struct btrfs_path *p, int ins_len, int cow);
+int btrfs_search_slot_for_read(struct btrfs_root *root,
+ const struct btrfs_key *key,
+ struct btrfs_path *p, int find_higher,
+ int return_any);
+void btrfs_release_path(struct btrfs_path *p);
+struct btrfs_path *btrfs_alloc_path(void);
+void btrfs_free_path(struct btrfs_path *p);
+static inline void btrfs_init_path(struct btrfs_path *p)
+{
+ memset(p, 0, sizeof(*p));
+}
+int btrfs_bin_search(struct extent_buffer *eb, const struct btrfs_key *key,
+ int *slot);
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+ u64 iobjectid, u64 ioff, u8 key_type,
+ struct btrfs_key *found_key);
#endif /* __BTRFS_CTREE_H__ */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 63b5bf0a86..aab197a6d5 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -6,119 +6,163 @@
*/
#include "btrfs.h"
+#include "disk-io.h"
-static int verify_dir_item(struct btrfs_dir_item *item, u32 start, u32 total)
+static int verify_dir_item(struct btrfs_root *root,
+ struct extent_buffer *leaf,
+ struct btrfs_dir_item *dir_item)
{
- u16 max_len = BTRFS_NAME_LEN;
- u32 end;
+ u16 namelen = BTRFS_NAME_LEN;
+ u8 type = btrfs_dir_type(leaf, dir_item);
- if (item->type >= BTRFS_FT_MAX) {
- printf("%s: invalid dir item type: %i\n", __func__, item->type);
+ if (type == BTRFS_FT_XATTR)
+ namelen = XATTR_NAME_MAX;
+
+ if (btrfs_dir_name_len(leaf, dir_item) > namelen) {
+ fprintf(stderr, "invalid dir item name len: %u\n",
+ (unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
- if (item->type == BTRFS_FT_XATTR)
- max_len = 255; /* XATTR_NAME_MAX */
-
- end = start + sizeof(*item) + item->name_len;
- if (item->name_len > max_len || end > total) {
- printf("%s: invalid dir item name len: %u\n", __func__,
- item->name_len);
+ /* BTRFS_MAX_XATTR_SIZE is the same for all dir items */
+ if ((btrfs_dir_data_len(leaf, dir_item) +
+ btrfs_dir_name_len(leaf, dir_item)) >
+ BTRFS_MAX_XATTR_SIZE(root->fs_info)) {
+ fprintf(stderr, "invalid dir item name + data len: %u + %u\n",
+ (unsigned)btrfs_dir_name_len(leaf, dir_item),
+ (unsigned)btrfs_dir_data_len(leaf, dir_item));
return 1;
}
return 0;
}
-static struct btrfs_dir_item *
-btrfs_match_dir_item_name(struct btrfs_path *path, const char *name,
- int name_len)
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+ struct btrfs_path *path,
+ const char *name, int name_len)
{
- struct btrfs_dir_item *item;
- u32 total_len, cur = 0, this_len;
- const char *name_ptr;
-
- item = btrfs_path_item_ptr(path, struct btrfs_dir_item);
-
- total_len = btrfs_path_item_size(path);
+ struct btrfs_dir_item *dir_item;
+ unsigned long name_ptr;
+ u32 total_len;
+ u32 cur = 0;
+ u32 this_len;
+ struct extent_buffer *leaf;
+
+ leaf = path->nodes[0];
+ dir_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
+ total_len = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (verify_dir_item(root, leaf, dir_item))
+ return NULL;
+
+ while(cur < total_len) {
+ this_len = sizeof(*dir_item) +
+ btrfs_dir_name_len(leaf, dir_item) +
+ btrfs_dir_data_len(leaf, dir_item);
+ if (this_len > (total_len - cur)) {
+ fprintf(stderr, "invalid dir item size\n");
+ return NULL;
+ }
- while (cur < total_len) {
- btrfs_dir_item_to_cpu(item);
- this_len = sizeof(*item) + item->name_len + item->data_len;
- name_ptr = (const char *) (item + 1);
+ name_ptr = (unsigned long)(dir_item + 1);
- if (verify_dir_item(item, cur, total_len))
- return NULL;
- if (item->name_len == name_len && !memcmp(name_ptr, name,
- name_len))
- return item;
+ if (btrfs_dir_name_len(leaf, dir_item) == name_len &&
+ memcmp_extent_buffer(leaf, name, name_ptr, name_len) == 0)
+ return dir_item;
cur += this_len;
- item = (struct btrfs_dir_item *) ((u8 *) item + this_len);
+ dir_item = (struct btrfs_dir_item *)((char *)dir_item +
+ this_len);
}
-
return NULL;
}
-int btrfs_lookup_dir_item(const struct btrfs_root *root, u64 dir,
- const char *name, int name_len,
- struct btrfs_dir_item *item)
+struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+ const char *name, int name_len,
+ int mod)
{
- struct btrfs_path path;
+ int ret;
struct btrfs_key key;
- struct btrfs_dir_item *res = NULL;
+ int ins_len = mod < 0 ? -1 : 0;
+ int cow = mod != 0;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
key.objectid = dir;
key.type = BTRFS_DIR_ITEM_KEY;
+
key.offset = btrfs_name_hash(name, name_len);
- if (btrfs_search_tree(root, &key, &path))
- return -1;
+ ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow);
+ if (ret < 0)
+ return ERR_PTR(ret);
+ if (ret > 0) {
+ if (path->slots[0] == 0)
+ return NULL;
+ path->slots[0]--;
+ }
- if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path)))
- goto out;
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
- res = btrfs_match_dir_item_name(&path, name, name_len);
- if (res)
- *item = *res;
-out:
- btrfs_free_path(&path);
- return res ? 0 : -1;
+ if (found_key.objectid != dir ||
+ found_key.type != BTRFS_DIR_ITEM_KEY ||
+ found_key.offset != key.offset)
+ return NULL;
+
+ return btrfs_match_dir_item_name(root, path, name, name_len);
}
-int btrfs_readdir(const struct btrfs_root *root, u64 dir,
- btrfs_readdir_callback_t callback)
+int btrfs_iter_dir(struct btrfs_root *root, u64 ino,
+ btrfs_iter_dir_callback_t callback)
{
struct btrfs_path path;
- struct btrfs_key key, *found_key;
- struct btrfs_dir_item *item;
- int res = 0;
+ struct btrfs_key key;
+ int ret;
- key.objectid = dir;
+ btrfs_init_path(&path);
+ key.objectid = ino;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = 0;
- if (btrfs_search_tree(root, &key, &path))
- return -1;
-
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret < 0)
+ return ret;
+ /* Should not happen */
+ if (ret == 0) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
+ ret = btrfs_next_leaf(root, &path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ goto out;
+ }
+ }
do {
- found_key = btrfs_path_leaf_key(&path);
- if (btrfs_comp_keys_type(&key, found_key))
- break;
-
- item = btrfs_path_item_ptr(&path, struct btrfs_dir_item);
- btrfs_dir_item_to_cpu(item);
-
- if (verify_dir_item(item, 0, sizeof(*item) + item->name_len))
- continue;
- if (item->type == BTRFS_FT_XATTR)
- continue;
+ struct btrfs_dir_item *di;
- if (callback(root, item))
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ if (key.objectid != ino || key.type != BTRFS_DIR_INDEX_KEY)
break;
- } while (!(res = btrfs_next_slot(&path)));
-
- btrfs_free_path(&path);
-
- return res < 0 ? -1 : 0;
+ di = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_dir_item);
+ if (verify_dir_item(root, path.nodes[0], di)) {
+ ret = -EUCLEAN;
+ goto out;
+ }
+ ret = callback(root, path.nodes[0], di);
+ if (ret < 0)
+ goto out;
+ } while (!(ret = btrfs_next_item(root, &path)));
+
+ if (ret > 0)
+ ret = 0;
+out:
+ btrfs_release_path(&path);
+ return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
new file mode 100644
index 0000000000..01e7cee520
--- /dev/null
+++ b/fs/btrfs/disk-io.c
@@ -0,0 +1,1062 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <common.h>
+#include <fs_internal.h>
+#include <uuid.h>
+#include <memalign.h>
+#include "kernel-shared/btrfs_tree.h"
+#include "common/rbtree-utils.h"
+#include "disk-io.h"
+#include "ctree.h"
+#include "btrfs.h"
+#include "volumes.h"
+#include "extent-io.h"
+#include "crypto/hash.h"
+
+/* specified errno for check_tree_block */
+#define BTRFS_BAD_BYTENR (-1)
+#define BTRFS_BAD_FSID (-2)
+#define BTRFS_BAD_LEVEL (-3)
+#define BTRFS_BAD_NRITEMS (-4)
+
+/* Calculate max possible nritems for a leaf/node */
+static u32 max_nritems(u8 level, u32 nodesize)
+{
+
+ if (level == 0)
+ return ((nodesize - sizeof(struct btrfs_header)) /
+ sizeof(struct btrfs_item));
+ return ((nodesize - sizeof(struct btrfs_header)) /
+ sizeof(struct btrfs_key_ptr));
+}
+
+static int check_tree_block(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *buf)
+{
+
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ u32 nodesize = fs_info->nodesize;
+ bool fsid_match = false;
+ int ret = BTRFS_BAD_FSID;
+
+ if (buf->start != btrfs_header_bytenr(buf))
+ return BTRFS_BAD_BYTENR;
+ if (btrfs_header_level(buf) >= BTRFS_MAX_LEVEL)
+ return BTRFS_BAD_LEVEL;
+ if (btrfs_header_nritems(buf) > max_nritems(btrfs_header_level(buf),
+ nodesize))
+ return BTRFS_BAD_NRITEMS;
+
+ /* Only leaf can be empty */
+ if (btrfs_header_nritems(buf) == 0 &&
+ btrfs_header_level(buf) != 0)
+ return BTRFS_BAD_NRITEMS;
+
+ while (fs_devices) {
+ /*
+ * Checking the incompat flag is only valid for the current
+ * fs. For seed devices it's forbidden to have their uuid
+ * changed so reading ->fsid in this case is fine
+ */
+ if (fs_devices == fs_info->fs_devices &&
+ btrfs_fs_incompat(fs_info, METADATA_UUID))
+ fsid_match = !memcmp_extent_buffer(buf,
+ fs_devices->metadata_uuid,
+ btrfs_header_fsid(),
+ BTRFS_FSID_SIZE);
+ else
+ fsid_match = !memcmp_extent_buffer(buf,
+ fs_devices->fsid,
+ btrfs_header_fsid(),
+ BTRFS_FSID_SIZE);
+
+
+ if (fsid_match) {
+ ret = 0;
+ break;
+ }
+ fs_devices = fs_devices->seed;
+ }
+ return ret;
+}
+
+static void print_tree_block_error(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb,
+ int err)
+{
+ char fs_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
+ char found_uuid[BTRFS_UUID_UNPARSED_SIZE] = {'\0'};
+ u8 buf[BTRFS_UUID_SIZE];
+
+ if (!err)
+ return;
+
+ fprintf(stderr, "bad tree block %llu, ", eb->start);
+ switch (err) {
+ case BTRFS_BAD_FSID:
+ read_extent_buffer(eb, buf, btrfs_header_fsid(),
+ BTRFS_UUID_SIZE);
+ uuid_unparse(buf, found_uuid);
+ uuid_unparse(fs_info->fs_devices->metadata_uuid, fs_uuid);
+ fprintf(stderr, "fsid mismatch, want=%s, have=%s\n",
+ fs_uuid, found_uuid);
+ break;
+ case BTRFS_BAD_BYTENR:
+ fprintf(stderr, "bytenr mismatch, want=%llu, have=%llu\n",
+ eb->start, btrfs_header_bytenr(eb));
+ break;
+ case BTRFS_BAD_LEVEL:
+ fprintf(stderr, "bad level, %u > %d\n",
+ btrfs_header_level(eb), BTRFS_MAX_LEVEL);
+ break;
+ case BTRFS_BAD_NRITEMS:
+ fprintf(stderr, "invalid nr_items: %u\n",
+ btrfs_header_nritems(eb));
+ break;
+ }
+}
+
+int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len)
+{
+ memset(out, 0, BTRFS_CSUM_SIZE);
+
+ switch (csum_type) {
+ case BTRFS_CSUM_TYPE_CRC32:
+ return hash_crc32c(data, len, out);
+ case BTRFS_CSUM_TYPE_XXHASH:
+ return hash_xxhash(data, len, out);
+ case BTRFS_CSUM_TYPE_SHA256:
+ return hash_sha256(data, len, out);
+ default:
+ printf("Unknown csum type %d\n", csum_type);
+ return -EINVAL;
+ }
+}
+
+/*
+ * Check if the super is valid:
+ * - nodesize/sectorsize - minimum, maximum, alignment
+ * - tree block starts - alignment
+ * - number of devices - something sane
+ * - sys array size - maximum
+ */
+static int btrfs_check_super(struct btrfs_super_block *sb)
+{
+ u8 result[BTRFS_CSUM_SIZE];
+ u16 csum_type;
+ int csum_size;
+ u8 *metadata_uuid;
+
+ if (btrfs_super_magic(sb) != BTRFS_MAGIC)
+ return -EIO;
+
+ csum_type = btrfs_super_csum_type(sb);
+ if (csum_type >= btrfs_super_num_csums()) {
+ error("unsupported checksum algorithm %u", csum_type);
+ return -EIO;
+ }
+ csum_size = btrfs_super_csum_size(sb);
+
+ btrfs_csum_data(csum_type, (u8 *)sb + BTRFS_CSUM_SIZE,
+ result, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+
+ if (memcmp(result, sb->csum, csum_size)) {
+ error("superblock checksum mismatch");
+ return -EIO;
+ }
+ if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ error("tree_root level too big: %d >= %d",
+ btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
+ goto error_out;
+ }
+ if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ error("chunk_root level too big: %d >= %d",
+ btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
+ goto error_out;
+ }
+ if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
+ error("log_root level too big: %d >= %d",
+ btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
+ goto error_out;
+ }
+
+ if (!IS_ALIGNED(btrfs_super_root(sb), 4096)) {
+ error("tree_root block unaligned: %llu", btrfs_super_root(sb));
+ goto error_out;
+ }
+ if (!IS_ALIGNED(btrfs_super_chunk_root(sb), 4096)) {
+ error("chunk_root block unaligned: %llu",
+ btrfs_super_chunk_root(sb));
+ goto error_out;
+ }
+ if (!IS_ALIGNED(btrfs_super_log_root(sb), 4096)) {
+ error("log_root block unaligned: %llu",
+ btrfs_super_log_root(sb));
+ goto error_out;
+ }
+ if (btrfs_super_nodesize(sb) < 4096) {
+ error("nodesize too small: %u < 4096",
+ btrfs_super_nodesize(sb));
+ goto error_out;
+ }
+ if (!IS_ALIGNED(btrfs_super_nodesize(sb), 4096)) {
+ error("nodesize unaligned: %u", btrfs_super_nodesize(sb));
+ goto error_out;
+ }
+ if (btrfs_super_sectorsize(sb) < 4096) {
+ error("sectorsize too small: %u < 4096",
+ btrfs_super_sectorsize(sb));
+ goto error_out;
+ }
+ if (!IS_ALIGNED(btrfs_super_sectorsize(sb), 4096)) {
+ error("sectorsize unaligned: %u", btrfs_super_sectorsize(sb));
+ goto error_out;
+ }
+ if (btrfs_super_total_bytes(sb) == 0) {
+ error("invalid total_bytes 0");
+ goto error_out;
+ }
+ if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
+ error("invalid bytes_used %llu", btrfs_super_bytes_used(sb));
+ goto error_out;
+ }
+ if ((btrfs_super_stripesize(sb) != 4096)
+ && (btrfs_super_stripesize(sb) != btrfs_super_sectorsize(sb))) {
+ error("invalid stripesize %u", btrfs_super_stripesize(sb));
+ goto error_out;
+ }
+
+ if (btrfs_super_incompat_flags(sb) & BTRFS_FEATURE_INCOMPAT_METADATA_UUID)
+ metadata_uuid = sb->metadata_uuid;
+ else
+ metadata_uuid = sb->fsid;
+
+ if (memcmp(metadata_uuid, sb->dev_item.fsid, BTRFS_FSID_SIZE) != 0) {
+ char fsid[BTRFS_UUID_UNPARSED_SIZE];
+ char dev_fsid[BTRFS_UUID_UNPARSED_SIZE];
+
+ uuid_unparse(sb->metadata_uuid, fsid);
+ uuid_unparse(sb->dev_item.fsid, dev_fsid);
+ error("dev_item UUID does not match fsid: %s != %s",
+ dev_fsid, fsid);
+ goto error_out;
+ }
+
+ /*
+ * Hint to catch really bogus numbers, bitflips or so
+ */
+ if (btrfs_super_num_devices(sb) > (1UL << 31)) {
+ error("suspicious number of devices: %llu",
+ btrfs_super_num_devices(sb));
+ }
+
+ if (btrfs_super_num_devices(sb) == 0) {
+ error("number of devices is 0");
+ goto error_out;
+ }
+
+ /*
+ * Obvious sys_chunk_array corruptions, it must hold at least one key
+ * and one chunk
+ */
+ if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
+ error("system chunk array too big %u > %u",
+ btrfs_super_sys_array_size(sb),
+ BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+ goto error_out;
+ }
+ if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
+ + sizeof(struct btrfs_chunk)) {
+ error("system chunk array too small %u < %zu",
+ btrfs_super_sys_array_size(sb),
+ sizeof(struct btrfs_disk_key) +
+ sizeof(struct btrfs_chunk));
+ goto error_out;
+ }
+
+ return 0;
+
+error_out:
+ error("superblock checksum matches but it has invalid members");
+ return -EIO;
+}
+
+/*
+ * btrfs_read_dev_super - read a valid primary superblock from a block device
+ * @desc,@part: file descriptor of the device
+ * @sb: buffer where the superblock is going to be read in
+ *
+ * Unlike the btrfs-progs/kernel version, here we ony care about the first
+ * super block, thus it's much simpler.
+ */
+int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
+ struct btrfs_super_block *sb)
+{
+ char tmp[BTRFS_SUPER_INFO_SIZE];
+ struct btrfs_super_block *buf = (struct btrfs_super_block *)tmp;
+ int ret;
+
+ ret = __btrfs_devread(desc, part, tmp, BTRFS_SUPER_INFO_SIZE,
+ BTRFS_SUPER_INFO_OFFSET);
+ if (ret < BTRFS_SUPER_INFO_SIZE)
+ return -EIO;
+
+ if (btrfs_super_bytenr(buf) != BTRFS_SUPER_INFO_OFFSET)
+ return -EIO;
+
+ if (btrfs_check_super(buf))
+ return -EIO;
+
+ memcpy(sb, buf, BTRFS_SUPER_INFO_SIZE);
+ return 0;
+}
+
+static int __csum_tree_block_size(struct extent_buffer *buf, u16 csum_size,
+ int verify, int silent, u16 csum_type)
+{
+ u8 result[BTRFS_CSUM_SIZE];
+ u32 len;
+
+ len = buf->len - BTRFS_CSUM_SIZE;
+ btrfs_csum_data(csum_type, (u8 *)buf->data + BTRFS_CSUM_SIZE,
+ result, len);
+
+ if (verify) {
+ if (memcmp_extent_buffer(buf, result, 0, csum_size)) {
+ /* FIXME: format */
+ if (!silent)
+ printk("checksum verify failed on %llu found %08X wanted %08X\n",
+ (unsigned long long)buf->start,
+ result[0],
+ buf->data[0]);
+ return 1;
+ }
+ } else {
+ write_extent_buffer(buf, result, 0, csum_size);
+ }
+ return 0;
+}
+
+int csum_tree_block_size(struct extent_buffer *buf, u16 csum_size, int verify,
+ u16 csum_type)
+{
+ return __csum_tree_block_size(buf, csum_size, verify, 0, csum_type);
+}
+
+static int csum_tree_block(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *buf, int verify)
+{
+ u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+ u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
+
+ return csum_tree_block_size(buf, csum_size, verify, csum_type);
+}
+
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
+ u64 bytenr, u32 blocksize)
+{
+ return find_extent_buffer(&fs_info->extent_cache,
+ bytenr, blocksize);
+}
+
+struct extent_buffer* btrfs_find_create_tree_block(
+ struct btrfs_fs_info *fs_info, u64 bytenr)
+{
+ return alloc_extent_buffer(fs_info, bytenr, fs_info->nodesize);
+}
+
+static int verify_parent_transid(struct extent_io_tree *io_tree,
+ struct extent_buffer *eb, u64 parent_transid,
+ int ignore)
+{
+ int ret;
+
+ if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
+ return 0;
+
+ if (extent_buffer_uptodate(eb) &&
+ btrfs_header_generation(eb) == parent_transid) {
+ ret = 0;
+ goto out;
+ }
+ printk("parent transid verify failed on %llu wanted %llu found %llu\n",
+ (unsigned long long)eb->start,
+ (unsigned long long)parent_transid,
+ (unsigned long long)btrfs_header_generation(eb));
+ if (ignore) {
+ eb->flags |= EXTENT_BAD_TRANSID;
+ printk("Ignoring transid failure\n");
+ return 0;
+ }
+
+ ret = 1;
+out:
+ clear_extent_buffer_uptodate(eb);
+ return ret;
+
+}
+
+int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror)
+{
+ unsigned long offset = 0;
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_device *device;
+ int ret = 0;
+ u64 read_len;
+ unsigned long bytes_left = eb->len;
+
+ while (bytes_left) {
+ read_len = bytes_left;
+ device = NULL;
+
+ ret = btrfs_map_block(info, READ, eb->start + offset,
+ &read_len, &multi, mirror, NULL);
+ if (ret) {
+ printk("Couldn't map the block %Lu\n", eb->start + offset);
+ kfree(multi);
+ return -EIO;
+ }
+ device = multi->stripes[0].dev;
+
+ if (!device->desc || !device->part) {
+ kfree(multi);
+ return -EIO;
+ }
+
+ if (read_len > bytes_left)
+ read_len = bytes_left;
+
+ ret = read_extent_from_disk(device->desc, device->part,
+ multi->stripes[0].physical, eb,
+ offset, read_len);
+ kfree(multi);
+ multi = NULL;
+
+ if (ret)
+ return -EIO;
+ offset += read_len;
+ bytes_left -= read_len;
+ }
+ return 0;
+}
+
+struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 parent_transid)
+{
+ int ret;
+ struct extent_buffer *eb;
+ u64 best_transid = 0;
+ u32 sectorsize = fs_info->sectorsize;
+ int mirror_num = 1;
+ int good_mirror = 0;
+ int candidate_mirror = 0;
+ int num_copies;
+ int ignore = 0;
+
+ /*
+ * Don't even try to create tree block for unaligned tree block
+ * bytenr.
+ * Such unaligned tree block will free overlapping extent buffer,
+ * causing use-after-free bugs for fuzzed images.
+ */
+ if (bytenr < sectorsize || !IS_ALIGNED(bytenr, sectorsize)) {
+ error("tree block bytenr %llu is not aligned to sectorsize %u",
+ bytenr, sectorsize);
+ return ERR_PTR(-EIO);
+ }
+
+ eb = btrfs_find_create_tree_block(fs_info, bytenr);
+ if (!eb)
+ return ERR_PTR(-ENOMEM);
+
+ if (btrfs_buffer_uptodate(eb, parent_transid))
+ return eb;
+
+ num_copies = btrfs_num_copies(fs_info, eb->start, eb->len);
+ while (1) {
+ ret = read_whole_eb(fs_info, eb, mirror_num);
+ if (ret == 0 && csum_tree_block(fs_info, eb, 1) == 0 &&
+ check_tree_block(fs_info, eb) == 0 &&
+ verify_parent_transid(&fs_info->extent_cache, eb,
+ parent_transid, ignore) == 0) {
+ /*
+ * check_tree_block() is less strict to allow btrfs
+ * check to get raw eb with bad key order and fix it.
+ * But we still need to try to get a good copy if
+ * possible, or bad key order can go into tools like
+ * btrfs ins dump-tree.
+ */
+ if (btrfs_header_level(eb))
+ ret = btrfs_check_node(fs_info, NULL, eb);
+ else
+ ret = btrfs_check_leaf(fs_info, NULL, eb);
+ if (!ret || candidate_mirror == mirror_num) {
+ btrfs_set_buffer_uptodate(eb);
+ return eb;
+ }
+ if (candidate_mirror <= 0)
+ candidate_mirror = mirror_num;
+ }
+ if (ignore) {
+ if (candidate_mirror > 0) {
+ mirror_num = candidate_mirror;
+ continue;
+ }
+ if (check_tree_block(fs_info, eb))
+ print_tree_block_error(fs_info, eb,
+ check_tree_block(fs_info, eb));
+ else
+ fprintf(stderr, "Csum didn't match\n");
+ ret = -EIO;
+ break;
+ }
+ if (num_copies == 1) {
+ ignore = 1;
+ continue;
+ }
+ if (btrfs_header_generation(eb) > best_transid) {
+ best_transid = btrfs_header_generation(eb);
+ good_mirror = mirror_num;
+ }
+ mirror_num++;
+ if (mirror_num > num_copies) {
+ if (candidate_mirror > 0)
+ mirror_num = candidate_mirror;
+ else
+ mirror_num = good_mirror;
+ ignore = 1;
+ continue;
+ }
+ }
+ /*
+ * We failed to read this tree block, it be should deleted right now
+ * to avoid stale cache populate the cache.
+ */
+ free_extent_buffer(eb);
+ return ERR_PTR(ret);
+}
+
+int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
+ u64 *len, int mirror)
+{
+ u64 offset = 0;
+ struct btrfs_multi_bio *multi = NULL;
+ struct btrfs_device *device;
+ int ret = 0;
+ u64 max_len = *len;
+
+ ret = btrfs_map_block(fs_info, READ, logical, len, &multi, mirror,
+ NULL);
+ if (ret) {
+ fprintf(stderr, "Couldn't map the block %llu\n",
+ logical + offset);
+ goto err;
+ }
+ device = multi->stripes[0].dev;
+
+ if (*len > max_len)
+ *len = max_len;
+ if (!device->desc || !device->part) {
+ ret = -EIO;
+ goto err;
+ }
+
+ ret = __btrfs_devread(device->desc, device->part, data, *len,
+ multi->stripes[0].physical);
+ if (ret != *len)
+ ret = -EIO;
+ else
+ ret = 0;
+err:
+ kfree(multi);
+ return ret;
+}
+
+void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
+ u64 objectid)
+{
+ root->node = NULL;
+ root->track_dirty = 0;
+
+ root->fs_info = fs_info;
+ root->objectid = objectid;
+ root->last_trans = 0;
+ root->last_inode_alloc = 0;
+
+ memset(&root->root_key, 0, sizeof(root->root_key));
+ memset(&root->root_item, 0, sizeof(root->root_item));
+ root->root_key.objectid = objectid;
+}
+
+static int find_and_setup_root(struct btrfs_root *tree_root,
+ struct btrfs_fs_info *fs_info,
+ u64 objectid, struct btrfs_root *root)
+{
+ int ret;
+ u64 generation;
+
+ btrfs_setup_root(root, fs_info, objectid);
+ ret = btrfs_find_last_root(tree_root, objectid,
+ &root->root_item, &root->root_key);
+ if (ret)
+ return ret;
+
+ generation = btrfs_root_generation(&root->root_item);
+ root->node = read_tree_block(fs_info,
+ btrfs_root_bytenr(&root->root_item), generation);
+ if (!extent_buffer_uptodate(root->node))
+ return -EIO;
+
+ return 0;
+}
+
+int btrfs_free_fs_root(struct btrfs_root *root)
+{
+ if (root->node)
+ free_extent_buffer(root->node);
+ kfree(root);
+ return 0;
+}
+
+static void __free_fs_root(struct rb_node *node)
+{
+ struct btrfs_root *root;
+
+ root = container_of(node, struct btrfs_root, rb_node);
+ btrfs_free_fs_root(root);
+}
+
+FREE_RB_BASED_TREE(fs_roots, __free_fs_root);
+
+struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location)
+{
+ struct btrfs_root *root;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_path *path;
+ struct extent_buffer *l;
+ u64 generation;
+ int ret = 0;
+
+ root = calloc(1, sizeof(*root));
+ if (!root)
+ return ERR_PTR(-ENOMEM);
+ if (location->offset == (u64)-1) {
+ ret = find_and_setup_root(tree_root, fs_info,
+ location->objectid, root);
+ if (ret) {
+ free(root);
+ return ERR_PTR(ret);
+ }
+ goto insert;
+ }
+
+ btrfs_setup_root(root, fs_info,
+ location->objectid);
+
+ path = btrfs_alloc_path();
+ if (!path) {
+ free(root);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
+ if (ret != 0) {
+ if (ret > 0)
+ ret = -ENOENT;
+ goto out;
+ }
+ l = path->nodes[0];
+ read_extent_buffer(l, &root->root_item,
+ btrfs_item_ptr_offset(l, path->slots[0]),
+ sizeof(root->root_item));
+ memcpy(&root->root_key, location, sizeof(*location));
+
+ /* If this root is already an orphan, no need to read */
+ if (btrfs_root_refs(&root->root_item) == 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ if (ret) {
+ free(root);
+ return ERR_PTR(ret);
+ }
+ generation = btrfs_root_generation(&root->root_item);
+ root->node = read_tree_block(fs_info,
+ btrfs_root_bytenr(&root->root_item), generation);
+ if (!extent_buffer_uptodate(root->node)) {
+ free(root);
+ return ERR_PTR(-EIO);
+ }
+insert:
+ root->ref_cows = 1;
+ return root;
+}
+
+static int btrfs_fs_roots_compare_objectids(struct rb_node *node,
+ void *data)
+{
+ u64 objectid = *((u64 *)data);
+ struct btrfs_root *root;
+
+ root = rb_entry(node, struct btrfs_root, rb_node);
+ if (objectid > root->objectid)
+ return 1;
+ else if (objectid < root->objectid)
+ return -1;
+ else
+ return 0;
+}
+
+int btrfs_fs_roots_compare_roots(struct rb_node *node1, struct rb_node *node2)
+{
+ struct btrfs_root *root;
+
+ root = rb_entry(node2, struct btrfs_root, rb_node);
+ return btrfs_fs_roots_compare_objectids(node1, (void *)&root->objectid);
+}
+
+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location)
+{
+ struct btrfs_root *root;
+ struct rb_node *node;
+ int ret;
+ u64 objectid = location->objectid;
+
+ if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
+ return fs_info->tree_root;
+ if (location->objectid == BTRFS_CHUNK_TREE_OBJECTID)
+ return fs_info->chunk_root;
+ if (location->objectid == BTRFS_CSUM_TREE_OBJECTID)
+ return fs_info->csum_root;
+ BUG_ON(location->objectid == BTRFS_TREE_RELOC_OBJECTID ||
+ location->offset != (u64)-1);
+
+ node = rb_search(&fs_info->fs_root_tree, (void *)&objectid,
+ btrfs_fs_roots_compare_objectids, NULL);
+ if (node)
+ return container_of(node, struct btrfs_root, rb_node);
+
+ root = btrfs_read_fs_root_no_cache(fs_info, location);
+ if (IS_ERR(root))
+ return root;
+
+ ret = rb_insert(&fs_info->fs_root_tree, &root->rb_node,
+ btrfs_fs_roots_compare_roots);
+ BUG_ON(ret);
+ return root;
+}
+
+void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
+{
+ free(fs_info->tree_root);
+ free(fs_info->chunk_root);
+ free(fs_info->csum_root);
+ free(fs_info->super_copy);
+ free(fs_info);
+}
+
+struct btrfs_fs_info *btrfs_new_fs_info(void)
+{
+ struct btrfs_fs_info *fs_info;
+
+ fs_info = calloc(1, sizeof(struct btrfs_fs_info));
+ if (!fs_info)
+ return NULL;
+
+ fs_info->tree_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->chunk_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->csum_root = calloc(1, sizeof(struct btrfs_root));
+ fs_info->super_copy = calloc(1, BTRFS_SUPER_INFO_SIZE);
+
+ if (!fs_info->tree_root || !fs_info->chunk_root ||
+ !fs_info->csum_root || !fs_info->super_copy)
+ goto free_all;
+
+ extent_io_tree_init(&fs_info->extent_cache);
+
+ fs_info->fs_root_tree = RB_ROOT;
+ cache_tree_init(&fs_info->mapping_tree.cache_tree);
+
+ mutex_init(&fs_info->fs_mutex);
+
+ return fs_info;
+free_all:
+ btrfs_free_fs_info(fs_info);
+ return NULL;
+}
+
+static int setup_root_or_create_block(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *info_root,
+ u64 objectid, char *str)
+{
+ struct btrfs_root *root = fs_info->tree_root;
+ int ret;
+
+ ret = find_and_setup_root(root, fs_info, objectid, info_root);
+ if (ret) {
+ error("could not setup %s tree", str);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_super_block *sb = fs_info->super_copy;
+ struct btrfs_root *root;
+ struct btrfs_key key;
+ u64 root_tree_bytenr;
+ u64 generation;
+ int ret;
+
+ root = fs_info->tree_root;
+ btrfs_setup_root(root, fs_info, BTRFS_ROOT_TREE_OBJECTID);
+ generation = btrfs_super_generation(sb);
+
+ root_tree_bytenr = btrfs_super_root(sb);
+
+ root->node = read_tree_block(fs_info, root_tree_bytenr, generation);
+ if (!extent_buffer_uptodate(root->node)) {
+ fprintf(stderr, "Couldn't read tree root\n");
+ return -EIO;
+ }
+
+ ret = setup_root_or_create_block(fs_info, fs_info->csum_root,
+ BTRFS_CSUM_TREE_OBJECTID, "csum");
+ if (ret)
+ return ret;
+ fs_info->csum_root->track_dirty = 1;
+
+ fs_info->last_trans_committed = generation;
+
+ key.objectid = BTRFS_FS_TREE_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ fs_info->fs_root = btrfs_read_fs_root(fs_info, &key);
+
+ if (IS_ERR(fs_info->fs_root))
+ return -EIO;
+ return 0;
+}
+
+void btrfs_release_all_roots(struct btrfs_fs_info *fs_info)
+{
+ if (fs_info->csum_root)
+ free_extent_buffer(fs_info->csum_root->node);
+ if (fs_info->tree_root)
+ free_extent_buffer(fs_info->tree_root->node);
+ if (fs_info->chunk_root)
+ free_extent_buffer(fs_info->chunk_root->node);
+}
+
+static void free_map_lookup(struct cache_extent *ce)
+{
+ struct map_lookup *map;
+
+ map = container_of(ce, struct map_lookup, ce);
+ kfree(map);
+}
+
+FREE_EXTENT_CACHE_BASED_TREE(mapping_cache, free_map_lookup);
+
+void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info)
+{
+ free_mapping_cache_tree(&fs_info->mapping_tree.cache_tree);
+ extent_io_tree_cleanup(&fs_info->extent_cache);
+}
+
+static int btrfs_scan_fs_devices(struct blk_desc *desc,
+ struct disk_partition *part,
+ struct btrfs_fs_devices **fs_devices)
+{
+ u64 total_devs;
+ int ret;
+
+ if (round_up(BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
+ desc->blksz) > (part->size << desc->log2blksz)) {
+ error("superblock end %u is larger than device size " LBAFU,
+ BTRFS_SUPER_INFO_SIZE + BTRFS_SUPER_INFO_OFFSET,
+ part->size << desc->log2blksz);
+ return -EINVAL;
+ }
+
+ ret = btrfs_scan_one_device(desc, part, fs_devices, &total_devs);
+ if (ret) {
+ fprintf(stderr, "No valid Btrfs found\n");
+ return ret;
+ }
+ return 0;
+}
+
+int btrfs_check_fs_compatibility(struct btrfs_super_block *sb)
+{
+ u64 features;
+
+ features = btrfs_super_incompat_flags(sb) &
+ ~BTRFS_FEATURE_INCOMPAT_SUPP;
+ if (features) {
+ printk("couldn't open because of unsupported "
+ "option features (%llx).\n",
+ (unsigned long long)features);
+ return -ENOTSUPP;
+ }
+
+ features = btrfs_super_incompat_flags(sb);
+ if (!(features & BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF)) {
+ features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
+ btrfs_set_super_incompat_flags(sb, features);
+ }
+
+ return 0;
+}
+
+static int btrfs_setup_chunk_tree_and_device_map(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_super_block *sb = fs_info->super_copy;
+ u64 chunk_root_bytenr;
+ u64 generation;
+ int ret;
+
+ btrfs_setup_root(fs_info->chunk_root, fs_info,
+ BTRFS_CHUNK_TREE_OBJECTID);
+
+ ret = btrfs_read_sys_array(fs_info);
+ if (ret)
+ return ret;
+
+ generation = btrfs_super_chunk_root_generation(sb);
+ chunk_root_bytenr = btrfs_super_chunk_root(sb);
+
+ fs_info->chunk_root->node = read_tree_block(fs_info,
+ chunk_root_bytenr,
+ generation);
+ if (!extent_buffer_uptodate(fs_info->chunk_root->node)) {
+ error("cannot read chunk root");
+ return -EIO;
+ }
+
+ ret = btrfs_read_chunk_tree(fs_info);
+ if (ret) {
+ fprintf(stderr, "Couldn't read chunk tree\n");
+ return ret;
+ }
+ return 0;
+}
+
+struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
+ struct disk_partition *part)
+{
+ struct btrfs_fs_info *fs_info;
+ struct btrfs_super_block *disk_super;
+ struct btrfs_fs_devices *fs_devices = NULL;
+ struct extent_buffer *eb;
+ int ret;
+
+ fs_info = btrfs_new_fs_info();
+ if (!fs_info) {
+ fprintf(stderr, "Failed to allocate memory for fs_info\n");
+ return NULL;
+ }
+
+ ret = btrfs_scan_fs_devices(desc, part, &fs_devices);
+ if (ret)
+ goto out;
+
+ fs_info->fs_devices = fs_devices;
+
+ ret = btrfs_open_devices(fs_devices);
+ if (ret)
+ goto out;
+
+ disk_super = fs_info->super_copy;
+ ret = btrfs_read_dev_super(desc, part, disk_super);
+ if (ret) {
+ printk("No valid btrfs found\n");
+ goto out_devices;
+ }
+
+ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_CHANGING_FSID) {
+ fprintf(stderr, "ERROR: Filesystem UUID change in progress\n");
+ goto out_devices;
+ }
+
+ ASSERT(!memcmp(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE));
+ if (btrfs_fs_incompat(fs_info, METADATA_UUID))
+ ASSERT(!memcmp(disk_super->metadata_uuid,
+ fs_devices->metadata_uuid, BTRFS_FSID_SIZE));
+
+ fs_info->sectorsize = btrfs_super_sectorsize(disk_super);
+ fs_info->nodesize = btrfs_super_nodesize(disk_super);
+ fs_info->stripesize = btrfs_super_stripesize(disk_super);
+
+ ret = btrfs_check_fs_compatibility(fs_info->super_copy);
+ if (ret)
+ goto out_devices;
+
+ ret = btrfs_setup_chunk_tree_and_device_map(fs_info);
+ if (ret)
+ goto out_chunk;
+
+ /* Chunk tree root is unable to read, return directly */
+ if (!fs_info->chunk_root)
+ return fs_info;
+
+ eb = fs_info->chunk_root->node;
+ read_extent_buffer(eb, fs_info->chunk_tree_uuid,
+ btrfs_header_chunk_tree_uuid(eb),
+ BTRFS_UUID_SIZE);
+
+ ret = btrfs_setup_all_roots(fs_info);
+ if (ret)
+ goto out_chunk;
+
+ return fs_info;
+
+out_chunk:
+ btrfs_release_all_roots(fs_info);
+ btrfs_cleanup_all_caches(fs_info);
+out_devices:
+ btrfs_close_devices(fs_devices);
+out:
+ btrfs_free_fs_info(fs_info);
+ return NULL;
+}
+
+int close_ctree_fs_info(struct btrfs_fs_info *fs_info)
+{
+ int ret;
+ int err = 0;
+
+ free_fs_roots_tree(&fs_info->fs_root_tree);
+
+ btrfs_release_all_roots(fs_info);
+ ret = btrfs_close_devices(fs_info->fs_devices);
+ btrfs_cleanup_all_caches(fs_info);
+ btrfs_free_fs_info(fs_info);
+ if (!err)
+ err = ret;
+ return err;
+}
+
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
+{
+ int ret;
+
+ ret = extent_buffer_uptodate(buf);
+ if (!ret)
+ return ret;
+
+ ret = verify_parent_transid(&buf->fs_info->extent_cache, buf,
+ parent_transid, 1);
+ return !ret;
+}
+
+int btrfs_set_buffer_uptodate(struct extent_buffer *eb)
+{
+ return set_extent_buffer_uptodate(eb);
+}
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
new file mode 100644
index 0000000000..a347912078
--- /dev/null
+++ b/fs/btrfs/disk-io.h
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0+
+#ifndef __BTRFS_DISK_IO_H__
+#define __BTRFS_DISK_IO_H__
+
+#include <linux/sizes.h>
+#include <fs_internal.h>
+#include "ctree.h"
+#include "disk-io.h"
+
+#define BTRFS_SUPER_INFO_OFFSET SZ_64K
+#define BTRFS_SUPER_INFO_SIZE SZ_4K
+
+/* From btrfs-progs */
+int read_whole_eb(struct btrfs_fs_info *info, struct extent_buffer *eb, int mirror);
+struct extent_buffer* read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
+ u64 parent_transid);
+
+int read_extent_data(struct btrfs_fs_info *fs_info, char *data, u64 logical,
+ u64 *len, int mirror);
+struct extent_buffer* btrfs_find_create_tree_block(
+ struct btrfs_fs_info *fs_info, u64 bytenr);
+struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
+ u64 bytenr, u32 blocksize);
+struct btrfs_root *btrfs_read_fs_root_no_cache(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location);
+struct btrfs_root *btrfs_read_fs_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_key *location);
+
+void btrfs_setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
+ u64 objectid);
+
+void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
+struct btrfs_fs_info *btrfs_new_fs_info(void);
+int btrfs_check_fs_compatibility(struct btrfs_super_block *sb);
+int btrfs_setup_all_roots(struct btrfs_fs_info *fs_info);
+void btrfs_release_all_roots(struct btrfs_fs_info *fs_info);
+void btrfs_cleanup_all_caches(struct btrfs_fs_info *fs_info);
+
+struct btrfs_fs_info *open_ctree_fs_info(struct blk_desc *desc,
+ struct disk_partition *part);
+int close_ctree_fs_info(struct btrfs_fs_info *fs_info);
+
+int btrfs_read_dev_super(struct blk_desc *desc, struct disk_partition *part,
+ struct btrfs_super_block *sb);
+int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid);
+int btrfs_set_buffer_uptodate(struct extent_buffer *buf);
+int btrfs_csum_data(u16 csum_type, const u8 *data, u8 *out, size_t len);
+int csum_tree_block_size(struct extent_buffer *buf, u16 csum_sectorsize,
+ int verify, u16 csum_type);
+#endif
diff --git a/fs/btrfs/extent-cache.c b/fs/btrfs/extent-cache.c
new file mode 100644
index 0000000000..bc8cf3a522
--- /dev/null
+++ b/fs/btrfs/extent-cache.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Crossported from the same named file of btrfs-progs.
+ *
+ * Minor modification to include headers.
+ */
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <stdlib.h>
+#include "extent-cache.h"
+#include "common/rbtree-utils.h"
+
+struct cache_extent_search_range {
+ u64 objectid;
+ u64 start;
+ u64 size;
+};
+
+static int cache_tree_comp_range(struct rb_node *node, void *data)
+{
+ struct cache_extent *entry;
+ struct cache_extent_search_range *range;
+
+ range = (struct cache_extent_search_range *)data;
+ entry = rb_entry(node, struct cache_extent, rb_node);
+
+ if (entry->start + entry->size <= range->start)
+ return 1;
+ else if (range->start + range->size <= entry->start)
+ return -1;
+ else
+ return 0;
+}
+
+static int cache_tree_comp_nodes(struct rb_node *node1, struct rb_node *node2)
+{
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ entry = rb_entry(node2, struct cache_extent, rb_node);
+ range.start = entry->start;
+ range.size = entry->size;
+
+ return cache_tree_comp_range(node1, (void *)&range);
+}
+
+static int cache_tree_comp_range2(struct rb_node *node, void *data)
+{
+ struct cache_extent *entry;
+ struct cache_extent_search_range *range;
+
+ range = (struct cache_extent_search_range *)data;
+ entry = rb_entry(node, struct cache_extent, rb_node);
+
+ if (entry->objectid < range->objectid)
+ return 1;
+ else if (entry->objectid > range->objectid)
+ return -1;
+ else if (entry->start + entry->size <= range->start)
+ return 1;
+ else if (range->start + range->size <= entry->start)
+ return -1;
+ else
+ return 0;
+}
+
+static int cache_tree_comp_nodes2(struct rb_node *node1, struct rb_node *node2)
+{
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ entry = rb_entry(node2, struct cache_extent, rb_node);
+ range.objectid = entry->objectid;
+ range.start = entry->start;
+ range.size = entry->size;
+
+ return cache_tree_comp_range2(node1, (void *)&range);
+}
+
+void cache_tree_init(struct cache_tree *tree)
+{
+ tree->root = RB_ROOT;
+}
+
+static struct cache_extent *alloc_cache_extent(u64 start, u64 size)
+{
+ struct cache_extent *pe = malloc(sizeof(*pe));
+
+ if (!pe)
+ return pe;
+
+ pe->objectid = 0;
+ pe->start = start;
+ pe->size = size;
+ return pe;
+}
+
+int add_cache_extent(struct cache_tree *tree, u64 start, u64 size)
+{
+ struct cache_extent *pe = alloc_cache_extent(start, size);
+ int ret;
+
+ if (!pe)
+ return -ENOMEM;
+
+ ret = insert_cache_extent(tree, pe);
+ if (ret)
+ free(pe);
+
+ return ret;
+}
+
+int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe)
+{
+ return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes);
+}
+
+int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe)
+{
+ return rb_insert(&tree->root, &pe->rb_node, cache_tree_comp_nodes2);
+}
+
+struct cache_extent *lookup_cache_extent(struct cache_tree *tree,
+ u64 start, u64 size)
+{
+ struct rb_node *node;
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ range.start = start;
+ range.size = size;
+ node = rb_search(&tree->root, &range, cache_tree_comp_range, NULL);
+ if (!node)
+ return NULL;
+
+ entry = rb_entry(node, struct cache_extent, rb_node);
+ return entry;
+}
+
+struct cache_extent *lookup_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start, u64 size)
+{
+ struct rb_node *node;
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ range.objectid = objectid;
+ range.start = start;
+ range.size = size;
+ node = rb_search(&tree->root, &range, cache_tree_comp_range2, NULL);
+ if (!node)
+ return NULL;
+
+ entry = rb_entry(node, struct cache_extent, rb_node);
+ return entry;
+}
+
+struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start)
+{
+ struct rb_node *next;
+ struct rb_node *node;
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ range.start = start;
+ range.size = 1;
+ node = rb_search(&tree->root, &range, cache_tree_comp_range, &next);
+ if (!node)
+ node = next;
+ if (!node)
+ return NULL;
+
+ entry = rb_entry(node, struct cache_extent, rb_node);
+ return entry;
+}
+
+struct cache_extent *search_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start)
+{
+ struct rb_node *next;
+ struct rb_node *node;
+ struct cache_extent *entry;
+ struct cache_extent_search_range range;
+
+ range.objectid = objectid;
+ range.start = start;
+ range.size = 1;
+ node = rb_search(&tree->root, &range, cache_tree_comp_range2, &next);
+ if (!node)
+ node = next;
+ if (!node)
+ return NULL;
+
+ entry = rb_entry(node, struct cache_extent, rb_node);
+ return entry;
+}
+
+struct cache_extent *first_cache_extent(struct cache_tree *tree)
+{
+ struct rb_node *node = rb_first(&tree->root);
+
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *last_cache_extent(struct cache_tree *tree)
+{
+ struct rb_node *node = rb_last(&tree->root);
+
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *prev_cache_extent(struct cache_extent *pe)
+{
+ struct rb_node *node = rb_prev(&pe->rb_node);
+
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct cache_extent, rb_node);
+}
+
+struct cache_extent *next_cache_extent(struct cache_extent *pe)
+{
+ struct rb_node *node = rb_next(&pe->rb_node);
+
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct cache_extent, rb_node);
+}
+
+void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe)
+{
+ rb_erase(&pe->rb_node, &tree->root);
+}
+
+void cache_tree_free_extents(struct cache_tree *tree,
+ free_cache_extent free_func)
+{
+ struct cache_extent *ce;
+
+ while ((ce = first_cache_extent(tree))) {
+ remove_cache_extent(tree, ce);
+ free_func(ce);
+ }
+}
+
+static void free_extent_cache(struct cache_extent *pe)
+{
+ free(pe);
+}
+
+void free_extent_cache_tree(struct cache_tree *tree)
+{
+ cache_tree_free_extents(tree, free_extent_cache);
+}
+
+int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size)
+{
+ struct cache_extent *cache;
+ struct cache_extent *next = NULL;
+ struct cache_extent *prev = NULL;
+ int next_merged = 0;
+ int prev_merged = 0;
+ int ret = 0;
+
+ if (cache_tree_empty(tree))
+ goto insert;
+
+ cache = search_cache_extent(tree, start);
+ if (!cache) {
+ /*
+ * Either the tree is completely empty, or the no range after
+ * start.
+ * Either way, the last cache_extent should be prev.
+ */
+ prev = last_cache_extent(tree);
+ } else if (start <= cache->start) {
+ next = cache;
+ prev = prev_cache_extent(cache);
+ } else {
+ prev = cache;
+ next = next_cache_extent(cache);
+ }
+
+ /*
+ * Ensure the range to be inserted won't cover with existings
+ * Or we will need extra loop to do merge
+ */
+ BUG_ON(next && start + size > next->start);
+ BUG_ON(prev && prev->start + prev->size > start);
+
+ if (next && start + size == next->start) {
+ next_merged = 1;
+ next->size = next->start + next->size - start;
+ next->start = start;
+ }
+ if (prev && prev->start + prev->size == start) {
+ prev_merged = 1;
+ if (next_merged) {
+ next->size = next->start + next->size - prev->start;
+ next->start = prev->start;
+ remove_cache_extent(tree, prev);
+ free(prev);
+ } else {
+ prev->size = start + size - prev->start;
+ }
+ }
+insert:
+ if (!prev_merged && !next_merged)
+ ret = add_cache_extent(tree, start, size);
+ return ret;
+}
diff --git a/fs/btrfs/extent-cache.h b/fs/btrfs/extent-cache.h
new file mode 100644
index 0000000000..2fee81a66e
--- /dev/null
+++ b/fs/btrfs/extent-cache.h
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Crossported from the same named file of btrfs-progs.
+ *
+ * Minor modification to include headers.
+ */
+#ifndef __BTRFS_EXTENT_CACHE_H__
+#define __BTRFS_EXTENT_CACHE_H__
+
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct cache_tree {
+ struct rb_root root;
+};
+
+struct cache_extent {
+ struct rb_node rb_node;
+ u64 objectid;
+ u64 start;
+ u64 size;
+};
+
+void cache_tree_init(struct cache_tree *tree);
+
+struct cache_extent *first_cache_extent(struct cache_tree *tree);
+struct cache_extent *last_cache_extent(struct cache_tree *tree);
+struct cache_extent *prev_cache_extent(struct cache_extent *pe);
+struct cache_extent *next_cache_extent(struct cache_extent *pe);
+
+/*
+ * Find a cache_extent which covers start.
+ *
+ * If not found, return next cache_extent if possible.
+ */
+struct cache_extent *search_cache_extent(struct cache_tree *tree, u64 start);
+
+/*
+ * Find a cache_extent which restrictly covers start.
+ *
+ * If not found, return NULL.
+ */
+struct cache_extent *lookup_cache_extent(struct cache_tree *tree,
+ u64 start, u64 size);
+
+/*
+ * Add an non-overlap extent into cache tree
+ *
+ * If [start, start+size) overlap with existing one, it will return -EEXIST.
+ */
+int add_cache_extent(struct cache_tree *tree, u64 start, u64 size);
+
+/*
+ * Same with add_cache_extent, but with cache_extent strcut.
+ */
+int insert_cache_extent(struct cache_tree *tree, struct cache_extent *pe);
+void remove_cache_extent(struct cache_tree *tree, struct cache_extent *pe);
+
+static inline int cache_tree_empty(struct cache_tree *tree)
+{
+ return RB_EMPTY_ROOT(&tree->root);
+}
+
+typedef void (*free_cache_extent)(struct cache_extent *pe);
+
+void cache_tree_free_extents(struct cache_tree *tree,
+ free_cache_extent free_func);
+
+#define FREE_EXTENT_CACHE_BASED_TREE(name, free_func) \
+static void free_##name##_tree(struct cache_tree *tree) \
+{ \
+ cache_tree_free_extents(tree, free_func); \
+}
+
+void free_extent_cache_tree(struct cache_tree *tree);
+
+/*
+ * Search a cache_extent with same objectid, and covers start.
+ *
+ * If not found, return next if possible.
+ */
+struct cache_extent *search_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start);
+/*
+ * Search a cache_extent with same objectid, and covers the range
+ * [start, start + size)
+ *
+ * If not found, return next cache_extent if possible.
+ */
+struct cache_extent *lookup_cache_extent2(struct cache_tree *tree,
+ u64 objectid, u64 start, u64 size);
+int insert_cache_extent2(struct cache_tree *tree, struct cache_extent *pe);
+
+/*
+ * Insert a cache_extent range [start, start + size).
+ *
+ * This function may merge with existing cache_extent.
+ * NOTE: caller must ensure the inserted range won't cover with any existing
+ * range.
+ */
+int add_merge_cache_extent(struct cache_tree *tree, u64 start, u64 size);
+
+#endif
diff --git a/fs/btrfs/extent-io.c b/fs/btrfs/extent-io.c
index 2e4599cf64..774e29eb60 100644
--- a/fs/btrfs/extent-io.c
+++ b/fs/btrfs/extent-io.c
@@ -5,122 +5,805 @@
* 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
*/
-#include "btrfs.h"
+#include <linux/kernel.h>
+#include <linux/bug.h>
#include <malloc.h>
#include <memalign.h>
+#include "btrfs.h"
+#include "ctree.h"
+#include "extent-io.h"
+#include "disk-io.h"
+
+void extent_io_tree_init(struct extent_io_tree *tree)
+{
+ cache_tree_init(&tree->state);
+ cache_tree_init(&tree->cache);
+ tree->cache_size = 0;
+}
+
+static struct extent_state *alloc_extent_state(void)
+{
+ struct extent_state *state;
+
+ state = malloc(sizeof(*state));
+ if (!state)
+ return NULL;
+ state->cache_node.objectid = 0;
+ state->refs = 1;
+ state->state = 0;
+ state->xprivate = 0;
+ return state;
+}
+
+static void btrfs_free_extent_state(struct extent_state *state)
+{
+ state->refs--;
+ BUG_ON(state->refs < 0);
+ if (state->refs == 0)
+ free(state);
+}
-u64 btrfs_read_extent_inline(struct btrfs_path *path,
- struct btrfs_file_extent_item *extent, u64 offset,
- u64 size, char *out)
+static void free_extent_state_func(struct cache_extent *cache)
{
- u32 clen, dlen, orig_size = size, res;
- const char *cbuf;
- char *dbuf;
- const int data_off = offsetof(struct btrfs_file_extent_item,
- disk_bytenr);
+ struct extent_state *es;
+
+ es = container_of(cache, struct extent_state, cache_node);
+ btrfs_free_extent_state(es);
+}
- clen = btrfs_path_item_size(path) - data_off;
- cbuf = (const char *) extent + data_off;
- dlen = extent->ram_bytes;
+static void free_extent_buffer_final(struct extent_buffer *eb);
+void extent_io_tree_cleanup(struct extent_io_tree *tree)
+{
+ cache_tree_free_extents(&tree->state, free_extent_state_func);
+}
- if (offset > dlen)
- return -1ULL;
+static inline void update_extent_state(struct extent_state *state)
+{
+ state->cache_node.start = state->start;
+ state->cache_node.size = state->end + 1 - state->start;
+}
+
+/*
+ * Utility function to look for merge candidates inside a given range.
+ * Any extents with matching state are merged together into a single
+ * extent in the tree. Extents with EXTENT_IO in their state field are
+ * not merged
+ */
+static int merge_state(struct extent_io_tree *tree,
+ struct extent_state *state)
+{
+ struct extent_state *other;
+ struct cache_extent *other_node;
- if (size > dlen - offset)
- size = dlen - offset;
+ if (state->state & EXTENT_IOBITS)
+ return 0;
- if (extent->compression == BTRFS_COMPRESS_NONE) {
- memcpy(out, cbuf + offset, size);
- return size;
+ other_node = prev_cache_extent(&state->cache_node);
+ if (other_node) {
+ other = container_of(other_node, struct extent_state,
+ cache_node);
+ if (other->end == state->start - 1 &&
+ other->state == state->state) {
+ state->start = other->start;
+ update_extent_state(state);
+ remove_cache_extent(&tree->state, &other->cache_node);
+ btrfs_free_extent_state(other);
+ }
}
+ other_node = next_cache_extent(&state->cache_node);
+ if (other_node) {
+ other = container_of(other_node, struct extent_state,
+ cache_node);
+ if (other->start == state->end + 1 &&
+ other->state == state->state) {
+ other->start = state->start;
+ update_extent_state(other);
+ remove_cache_extent(&tree->state, &state->cache_node);
+ btrfs_free_extent_state(state);
+ }
+ }
+ return 0;
+}
+
+/*
+ * insert an extent_state struct into the tree. 'bits' are set on the
+ * struct before it is inserted.
+ */
+static int insert_state(struct extent_io_tree *tree,
+ struct extent_state *state, u64 start, u64 end,
+ int bits)
+{
+ int ret;
+
+ BUG_ON(end < start);
+ state->state |= bits;
+ state->start = start;
+ state->end = end;
+ update_extent_state(state);
+ ret = insert_cache_extent(&tree->state, &state->cache_node);
+ BUG_ON(ret);
+ merge_state(tree, state);
+ return 0;
+}
+
+/*
+ * split a given extent state struct in two, inserting the preallocated
+ * struct 'prealloc' as the newly created second half. 'split' indicates an
+ * offset inside 'orig' where it should be split.
+ */
+static int split_state(struct extent_io_tree *tree, struct extent_state *orig,
+ struct extent_state *prealloc, u64 split)
+{
+ int ret;
+ prealloc->start = orig->start;
+ prealloc->end = split - 1;
+ prealloc->state = orig->state;
+ update_extent_state(prealloc);
+ orig->start = split;
+ update_extent_state(orig);
+ ret = insert_cache_extent(&tree->state, &prealloc->cache_node);
+ BUG_ON(ret);
+ return 0;
+}
+
+/*
+ * clear some bits on a range in the tree.
+ */
+static int clear_state_bit(struct extent_io_tree *tree,
+ struct extent_state *state, int bits)
+{
+ int ret = state->state & bits;
- if (dlen > orig_size) {
- dbuf = malloc(dlen);
- if (!dbuf)
- return -1ULL;
+ state->state &= ~bits;
+ if (state->state == 0) {
+ remove_cache_extent(&tree->state, &state->cache_node);
+ btrfs_free_extent_state(state);
} else {
- dbuf = out;
+ merge_state(tree, state);
+ }
+ return ret;
+}
+
+/*
+ * extent_buffer_bitmap_set - set an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to set
+ */
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len)
+{
+ u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
+ const unsigned int size = pos + len;
+ int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+ u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
+
+ while (len >= bits_to_set) {
+ *p |= mask_to_set;
+ len -= bits_to_set;
+ bits_to_set = BITS_PER_BYTE;
+ mask_to_set = ~0;
+ p++;
+ }
+ if (len) {
+ mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
+/*
+ * extent_buffer_bitmap_clear - clear an area of a bitmap
+ * @eb: the extent buffer
+ * @start: offset of the bitmap item in the extent buffer
+ * @pos: bit number of the first bit
+ * @len: number of bits to clear
+ */
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len)
+{
+ u8 *p = (u8 *)eb->data + start + BIT_BYTE(pos);
+ const unsigned int size = pos + len;
+ int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
+ u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
+
+ while (len >= bits_to_clear) {
+ *p &= ~mask_to_clear;
+ len -= bits_to_clear;
+ bits_to_clear = BITS_PER_BYTE;
+ mask_to_clear = ~0;
+ p++;
+ }
+ if (len) {
+ mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
+
+/*
+ * clear some bits on a range in the tree.
+ */
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct cache_extent *node;
+ u64 last_end;
+ int err;
+ int set = 0;
+
+again:
+ if (!prealloc) {
+ prealloc = alloc_extent_state();
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ /*
+ * this search will find the extents that end after
+ * our range starts
+ */
+ node = search_cache_extent(&tree->state, start);
+ if (!node)
+ goto out;
+ state = container_of(node, struct extent_state, cache_node);
+ if (state->start > end)
+ goto out;
+ last_end = state->end;
+
+ /*
+ * | ---- desired range ---- |
+ * | state | or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip
+ * bits on second half.
+ *
+ * If the extent we found extends past our range, we
+ * just split and search again. It'll get split again
+ * the next time though.
+ *
+ * If the extent we found is inside our range, we clear
+ * the desired bit on it.
+ */
+ if (state->start < start) {
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ set |= clear_state_bit(tree, state, bits);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * We need to split the extent, and clear the bit
+ * on the first half
+ */
+ if (state->start <= end && state->end > end) {
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ set |= clear_state_bit(tree, prealloc, bits);
+ prealloc = NULL;
+ goto out;
+ }
+
+ start = state->end + 1;
+ set |= clear_state_bit(tree, state, bits);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
+ goto search_again;
+out:
+ if (prealloc)
+ btrfs_free_extent_state(prealloc);
+ return set;
+
+search_again:
+ if (start > end)
+ goto out;
+ goto again;
+}
+
+/*
+ * set some bits on a range in the tree.
+ */
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits)
+{
+ struct extent_state *state;
+ struct extent_state *prealloc = NULL;
+ struct cache_extent *node;
+ int err = 0;
+ u64 last_start;
+ u64 last_end;
+again:
+ if (!prealloc) {
+ prealloc = alloc_extent_state();
+ if (!prealloc)
+ return -ENOMEM;
+ }
+
+ /*
+ * this search will find the extents that end after
+ * our range starts
+ */
+ node = search_cache_extent(&tree->state, start);
+ if (!node) {
+ err = insert_state(tree, prealloc, start, end, bits);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ goto out;
+ }
+
+ state = container_of(node, struct extent_state, cache_node);
+ last_start = state->start;
+ last_end = state->end;
+
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ *
+ * Just lock what we found and keep going
+ */
+ if (state->start == start && state->end <= end) {
+ state->state |= bits;
+ merge_state(tree, state);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state |
+ * or
+ * | ------------- state -------------- |
+ *
+ * We need to split the extent we found, and may flip bits on
+ * second half.
+ *
+ * If the extent we found extends past our
+ * range, we just split and search again. It'll get split
+ * again the next time though.
+ *
+ * If the extent we found is inside our range, we set the
+ * desired bit on it.
+ */
+ if (state->start < start) {
+ err = split_state(tree, state, prealloc, start);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ if (state->end <= end) {
+ state->state |= bits;
+ start = state->end + 1;
+ merge_state(tree, state);
+ if (last_end == (u64)-1)
+ goto out;
+ start = last_end + 1;
+ } else {
+ start = state->start;
+ }
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | state | or | state |
+ *
+ * There's a hole, we need to insert something in it and
+ * ignore the extent we found.
+ */
+ if (state->start > start) {
+ u64 this_end;
+ if (end < last_start)
+ this_end = end;
+ else
+ this_end = last_start -1;
+ err = insert_state(tree, prealloc, start, this_end,
+ bits);
+ BUG_ON(err == -EEXIST);
+ prealloc = NULL;
+ if (err)
+ goto out;
+ start = this_end + 1;
+ goto search_again;
+ }
+ /*
+ * | ---- desired range ---- |
+ * | ---------- state ---------- |
+ * We need to split the extent, and set the bit
+ * on the first half
+ */
+ err = split_state(tree, state, prealloc, end + 1);
+ BUG_ON(err == -EEXIST);
+
+ state->state |= bits;
+ merge_state(tree, prealloc);
+ prealloc = NULL;
+out:
+ if (prealloc)
+ btrfs_free_extent_state(prealloc);
+ return err;
+search_again:
+ if (start > end)
+ goto out;
+ goto again;
+}
+
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return set_extent_bits(tree, start, end, EXTENT_DIRTY);
+}
+
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end)
+{
+ return clear_extent_bits(tree, start, end, EXTENT_DIRTY);
+}
+
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, int bits)
+{
+ struct cache_extent *node;
+ struct extent_state *state;
+ int ret = 1;
+
+ /*
+ * this search will find all the extents that end after
+ * our range starts.
+ */
+ node = search_cache_extent(&tree->state, start);
+ if (!node)
+ goto out;
+
+ while(1) {
+ state = container_of(node, struct extent_state, cache_node);
+ if (state->end >= start && (state->state & bits)) {
+ *start_ret = state->start;
+ *end_ret = state->end;
+ ret = 0;
+ break;
+ }
+ node = next_cache_extent(node);
+ if (!node)
+ break;
+ }
+out:
+ return ret;
+}
+
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int filled)
+{
+ struct extent_state *state = NULL;
+ struct cache_extent *node;
+ int bitset = 0;
+
+ node = search_cache_extent(&tree->state, start);
+ while (node && start <= end) {
+ state = container_of(node, struct extent_state, cache_node);
+
+ if (filled && state->start > start) {
+ bitset = 0;
+ break;
+ }
+ if (state->start > end)
+ break;
+ if (state->state & bits) {
+ bitset = 1;
+ if (!filled)
+ break;
+ } else if (filled) {
+ bitset = 0;
+ break;
+ }
+ start = state->end + 1;
+ if (start > end)
+ break;
+ node = next_cache_extent(node);
+ if (!node) {
+ if (filled)
+ bitset = 0;
+ break;
+ }
+ }
+ return bitset;
+}
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+{
+ struct cache_extent *node;
+ struct extent_state *state;
+ int ret = 0;
+
+ node = search_cache_extent(&tree->state, start);
+ if (!node) {
+ ret = -ENOENT;
+ goto out;
+ }
+ state = container_of(node, struct extent_state, cache_node);
+ if (state->start != start) {
+ ret = -ENOENT;
+ goto out;
+ }
+ state->xprivate = private;
+out:
+ return ret;
+}
+
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
+{
+ struct cache_extent *node;
+ struct extent_state *state;
+ int ret = 0;
+
+ node = search_cache_extent(&tree->state, start);
+ if (!node) {
+ ret = -ENOENT;
+ goto out;
+ }
+ state = container_of(node, struct extent_state, cache_node);
+ if (state->start != start) {
+ ret = -ENOENT;
+ goto out;
+ }
+ *private = state->xprivate;
+out:
+ return ret;
+}
+
+static struct extent_buffer *__alloc_extent_buffer(struct btrfs_fs_info *info,
+ u64 bytenr, u32 blocksize)
+{
+ struct extent_buffer *eb;
+
+ eb = calloc(1, sizeof(struct extent_buffer));
+ if (!eb)
+ return NULL;
+ eb->data = malloc_cache_aligned(blocksize);
+ if (!eb->data) {
+ free(eb);
+ return NULL;
+ }
+
+ eb->start = bytenr;
+ eb->len = blocksize;
+ eb->refs = 1;
+ eb->flags = 0;
+ eb->cache_node.start = bytenr;
+ eb->cache_node.size = blocksize;
+ eb->fs_info = info;
+ memset_extent_buffer(eb, 0, 0, blocksize);
+
+ return eb;
+}
+
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
+{
+ struct extent_buffer *new;
+
+ new = __alloc_extent_buffer(src->fs_info, src->start, src->len);
+ if (!new)
+ return NULL;
+
+ copy_extent_buffer(new, src, 0, 0, src->len);
+ new->flags |= EXTENT_BUFFER_DUMMY;
+
+ return new;
+}
+
+static void free_extent_buffer_final(struct extent_buffer *eb)
+{
+ BUG_ON(eb->refs);
+ if (!(eb->flags & EXTENT_BUFFER_DUMMY)) {
+ struct extent_io_tree *tree = &eb->fs_info->extent_cache;
+
+ remove_cache_extent(&tree->cache, &eb->cache_node);
+ BUG_ON(tree->cache_size < eb->len);
+ tree->cache_size -= eb->len;
}
+ free(eb->data);
+ free(eb);
+}
- res = btrfs_decompress(extent->compression, cbuf, clen, dbuf, dlen);
- if (res == -1 || res != dlen)
- goto err;
+static void free_extent_buffer_internal(struct extent_buffer *eb, bool free_now)
+{
+ if (!eb || IS_ERR(eb))
+ return;
- if (dlen > orig_size) {
- memcpy(out, dbuf + offset, size);
- free(dbuf);
- } else if (offset) {
- memmove(out, dbuf + offset, size);
+ eb->refs--;
+ BUG_ON(eb->refs < 0);
+ if (eb->refs == 0) {
+ if (eb->flags & EXTENT_DIRTY) {
+ error(
+ "dirty eb leak (aborted trans): start %llu len %u",
+ eb->start, eb->len);
+ }
+ if (eb->flags & EXTENT_BUFFER_DUMMY || free_now)
+ free_extent_buffer_final(eb);
}
+}
+
+void free_extent_buffer(struct extent_buffer *eb)
+{
+ free_extent_buffer_internal(eb, 1);
+}
- return size;
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 bytenr, u32 blocksize)
+{
+ struct extent_buffer *eb = NULL;
+ struct cache_extent *cache;
-err:
- if (dlen > orig_size)
- free(dbuf);
- return -1ULL;
+ cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
+ if (cache && cache->start == bytenr &&
+ cache->size == blocksize) {
+ eb = container_of(cache, struct extent_buffer, cache_node);
+ eb->refs++;
+ }
+ return eb;
}
-u64 btrfs_read_extent_reg(struct btrfs_path *path,
- struct btrfs_file_extent_item *extent, u64 offset,
- u64 size, char *out)
+struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
+ u64 start)
{
- u64 physical, clen, dlen, orig_size = size;
- u32 res;
- char *cbuf, *dbuf;
+ struct extent_buffer *eb = NULL;
+ struct cache_extent *cache;
- clen = extent->disk_num_bytes;
- dlen = extent->num_bytes;
+ cache = search_cache_extent(&tree->cache, start);
+ if (cache) {
+ eb = container_of(cache, struct extent_buffer, cache_node);
+ eb->refs++;
+ }
+ return eb;
+}
- if (offset > dlen)
- return -1ULL;
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 bytenr, u32 blocksize)
+{
+ struct extent_buffer *eb;
+ struct extent_io_tree *tree = &fs_info->extent_cache;
+ struct cache_extent *cache;
- if (size > dlen - offset)
- size = dlen - offset;
+ cache = lookup_cache_extent(&tree->cache, bytenr, blocksize);
+ if (cache && cache->start == bytenr &&
+ cache->size == blocksize) {
+ eb = container_of(cache, struct extent_buffer, cache_node);
+ eb->refs++;
+ } else {
+ int ret;
- /* sparse extent */
- if (extent->disk_bytenr == 0) {
- memset(out, 0, size);
- return size;
+ if (cache) {
+ eb = container_of(cache, struct extent_buffer,
+ cache_node);
+ free_extent_buffer(eb);
+ }
+ eb = __alloc_extent_buffer(fs_info, bytenr, blocksize);
+ if (!eb)
+ return NULL;
+ ret = insert_cache_extent(&tree->cache, &eb->cache_node);
+ if (ret) {
+ free(eb);
+ return NULL;
+ }
+ tree->cache_size += blocksize;
}
+ return eb;
+}
- physical = btrfs_map_logical_to_physical(extent->disk_bytenr);
- if (physical == -1ULL)
- return -1ULL;
+/*
+ * Allocate a dummy extent buffer which won't be inserted into extent buffer
+ * cache.
+ *
+ * This mostly allows super block read write using existing eb infrastructure
+ * without pulluting the eb cache.
+ *
+ * This is especially important to avoid injecting eb->start == SZ_64K, as
+ * fuzzed image could have invalid tree bytenr covers super block range,
+ * and cause ref count underflow.
+ */
+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 bytenr, u32 blocksize)
+{
+ struct extent_buffer *ret;
- if (extent->compression == BTRFS_COMPRESS_NONE) {
- physical += extent->offset + offset;
- if (!btrfs_devread(physical, size, out))
- return -1ULL;
+ ret = __alloc_extent_buffer(fs_info, bytenr, blocksize);
+ if (!ret)
+ return NULL;
+
+ ret->flags |= EXTENT_BUFFER_DUMMY;
+
+ return ret;
+}
+
+int read_extent_from_disk(struct blk_desc *desc, struct disk_partition *part,
+ u64 physical, struct extent_buffer *eb,
+ unsigned long offset, unsigned long len)
+{
+ int ret;
- return size;
+ ret = __btrfs_devread(desc, part, eb->data + offset, len, physical);
+ if (ret < 0)
+ goto out;
+ if (ret != len) {
+ ret = -EIO;
+ goto out;
}
+ ret = 0;
+out:
+ return ret;
+}
+
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len)
+{
+ return memcmp(eb->data + start, ptrv, len);
+}
- cbuf = malloc_cache_aligned(dlen > size ? clen + dlen : clen);
- if (!cbuf)
- return -1ULL;
+void read_extent_buffer(const struct extent_buffer *eb, void *dst,
+ unsigned long start, unsigned long len)
+{
+ memcpy(dst, eb->data + start, len);
+}
- if (dlen > orig_size)
- dbuf = cbuf + clen;
- else
- dbuf = out;
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+ unsigned long start, unsigned long len)
+{
+ memcpy(eb->data + start, src, len);
+}
+
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+ unsigned long dst_offset, unsigned long src_offset,
+ unsigned long len)
+{
+ memcpy(dst->data + dst_offset, src->data + src_offset, len);
+}
- if (!btrfs_devread(physical, clen, cbuf))
- goto err;
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+ unsigned long src_offset, unsigned long len)
+{
+ memmove(dst->data + dst_offset, dst->data + src_offset, len);
+}
- res = btrfs_decompress(extent->compression, cbuf, clen, dbuf, dlen);
- if (res == -1)
- goto err;
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+ unsigned long start, unsigned long len)
+{
+ memset(eb->data + start, c, len);
+}
- if (dlen > orig_size)
- memcpy(out, dbuf + offset, size);
- else
- memmove(out, dbuf + offset, size);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ unsigned long nr)
+{
+ return le_test_bit(nr, (u8 *)eb->data + start);
+}
- free(cbuf);
- return res;
+int set_extent_buffer_dirty(struct extent_buffer *eb)
+{
+ struct extent_io_tree *tree = &eb->fs_info->extent_cache;
+ if (!(eb->flags & EXTENT_DIRTY)) {
+ eb->flags |= EXTENT_DIRTY;
+ set_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
+ extent_buffer_get(eb);
+ }
+ return 0;
+}
-err:
- free(cbuf);
- return -1ULL;
+int clear_extent_buffer_dirty(struct extent_buffer *eb)
+{
+ struct extent_io_tree *tree = &eb->fs_info->extent_cache;
+ if (eb->flags & EXTENT_DIRTY) {
+ eb->flags &= ~EXTENT_DIRTY;
+ clear_extent_dirty(tree, eb->start, eb->start + eb->len - 1);
+ free_extent_buffer(eb);
+ }
+ return 0;
}
diff --git a/fs/btrfs/extent-io.h b/fs/btrfs/extent-io.h
new file mode 100644
index 0000000000..6b0c87da96
--- /dev/null
+++ b/fs/btrfs/extent-io.h
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Crossported from btrfs-progs/extent_io.h
+ *
+ * Modification includes:
+ * - extent_buffer:data
+ * Use pointer to provide better alignment.
+ * - Remove max_cache_size related interfaces
+ * Includes free_extent_buffer_nocache()
+ * As we don't cache eb in U-boot.
+ * - Include headers
+ *
+ * Write related functions are kept as we still need to modify dummy extent
+ * buffers even in RO environment.
+ */
+#ifndef __BTRFS_EXTENT_IO_H__
+#define __BTRFS_EXTENT_IO_H__
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/err.h>
+#include <linux/bitops.h>
+#include <fs_internal.h>
+#include "extent-cache.h"
+
+#define EXTENT_DIRTY (1U << 0)
+#define EXTENT_WRITEBACK (1U << 1)
+#define EXTENT_UPTODATE (1U << 2)
+#define EXTENT_LOCKED (1U << 3)
+#define EXTENT_NEW (1U << 4)
+#define EXTENT_DELALLOC (1U << 5)
+#define EXTENT_DEFRAG (1U << 6)
+#define EXTENT_DEFRAG_DONE (1U << 7)
+#define EXTENT_BUFFER_FILLED (1U << 8)
+#define EXTENT_CSUM (1U << 9)
+#define EXTENT_BAD_TRANSID (1U << 10)
+#define EXTENT_BUFFER_DUMMY (1U << 11)
+#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK)
+
+#define BLOCK_GROUP_DATA (1U << 1)
+#define BLOCK_GROUP_METADATA (1U << 2)
+#define BLOCK_GROUP_SYSTEM (1U << 4)
+
+/*
+ * The extent buffer bitmap operations are done with byte granularity instead of
+ * word granularity for two reasons:
+ * 1. The bitmaps must be little-endian on disk.
+ * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a
+ * single word in a bitmap may straddle two pages in the extent buffer.
+ */
+#define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE)
+#define BYTE_MASK ((1 << BITS_PER_BYTE) - 1)
+#define BITMAP_FIRST_BYTE_MASK(start) \
+ ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK)
+#define BITMAP_LAST_BYTE_MASK(nbits) \
+ (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1)))
+
+static inline int le_test_bit(int nr, const u8 *addr)
+{
+ return 1U & (addr[BIT_BYTE(nr)] >> (nr & (BITS_PER_BYTE-1)));
+}
+
+struct btrfs_fs_info;
+
+struct extent_io_tree {
+ struct cache_tree state;
+ struct cache_tree cache;
+ u64 cache_size;
+};
+
+struct extent_state {
+ struct cache_extent cache_node;
+ u64 start;
+ u64 end;
+ int refs;
+ unsigned long state;
+ u64 xprivate;
+};
+
+struct extent_buffer {
+ struct cache_extent cache_node;
+ u64 start;
+ u32 len;
+ int refs;
+ u32 flags;
+ struct btrfs_fs_info *fs_info;
+ char *data;
+};
+
+static inline void extent_buffer_get(struct extent_buffer *eb)
+{
+ eb->refs++;
+}
+
+void extent_io_tree_init(struct extent_io_tree *tree);
+void extent_io_tree_cleanup(struct extent_io_tree *tree);
+int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
+int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int bits);
+int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, int bits);
+int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
+ int bits, int filled);
+int set_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end);
+int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end);
+static inline int set_extent_buffer_uptodate(struct extent_buffer *eb)
+{
+ eb->flags |= EXTENT_UPTODATE;
+ return 0;
+}
+
+static inline int clear_extent_buffer_uptodate(struct extent_buffer *eb)
+{
+ eb->flags &= ~EXTENT_UPTODATE;
+ return 0;
+}
+
+static inline int extent_buffer_uptodate(struct extent_buffer *eb)
+{
+ if (!eb || IS_ERR(eb))
+ return 0;
+ if (eb->flags & EXTENT_UPTODATE)
+ return 1;
+ return 0;
+}
+
+int set_state_private(struct extent_io_tree *tree, u64 start, u64 xprivate);
+int get_state_private(struct extent_io_tree *tree, u64 start, u64 *xprivate);
+struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
+ u64 bytenr, u32 blocksize);
+struct extent_buffer *find_first_extent_buffer(struct extent_io_tree *tree,
+ u64 start);
+struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 bytenr, u32 blocksize);
+struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src);
+struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info,
+ u64 bytenr, u32 blocksize);
+void free_extent_buffer(struct extent_buffer *eb);
+int read_extent_from_disk(struct blk_desc *desc, struct disk_partition *part,
+ u64 physical, struct extent_buffer *eb,
+ unsigned long offset, unsigned long len);
+int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv,
+ unsigned long start, unsigned long len);
+void read_extent_buffer(const struct extent_buffer *eb, void *dst,
+ unsigned long start, unsigned long len);
+void write_extent_buffer(struct extent_buffer *eb, const void *src,
+ unsigned long start, unsigned long len);
+void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
+ unsigned long dst_offset, unsigned long src_offset,
+ unsigned long len);
+void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
+ unsigned long src_offset, unsigned long len);
+void memset_extent_buffer(struct extent_buffer *eb, char c,
+ unsigned long start, unsigned long len);
+int extent_buffer_test_bit(struct extent_buffer *eb, unsigned long start,
+ unsigned long nr);
+int set_extent_buffer_dirty(struct extent_buffer *eb);
+int clear_extent_buffer_dirty(struct extent_buffer *eb);
+void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len);
+void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
+ unsigned long pos, unsigned long len);
+
+#endif
diff --git a/fs/btrfs/hash.c b/fs/btrfs/hash.c
deleted file mode 100644
index 52a8ceaf0c..0000000000
--- a/fs/btrfs/hash.c
+++ /dev/null
@@ -1,38 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include "btrfs.h"
-#include <u-boot/crc.h>
-#include <asm/unaligned.h>
-
-static u32 btrfs_crc32c_table[256];
-
-void btrfs_hash_init(void)
-{
- static int inited = 0;
-
- if (!inited) {
- crc32c_init(btrfs_crc32c_table, 0x82F63B78);
- inited = 1;
- }
-}
-
-u32 btrfs_crc32c(u32 crc, const void *data, size_t length)
-{
- return crc32c_cal(crc, (const char *) data, length,
- btrfs_crc32c_table);
-}
-
-u32 btrfs_csum_data(char *data, u32 seed, size_t len)
-{
- return btrfs_crc32c(seed, data, len);
-}
-
-void btrfs_csum_final(u32 crc, void *result)
-{
- put_unaligned(cpu_to_le32(~crc), (u32 *)result);
-}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 991c2f68c3..ff330280e0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5,193 +5,187 @@
* 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
*/
-#include "btrfs.h"
+#include <linux/kernel.h>
#include <malloc.h>
+#include <memalign.h>
+#include "btrfs.h"
+#include "disk-io.h"
+#include "volumes.h"
-u64 btrfs_lookup_inode_ref(struct btrfs_root *root, u64 inr,
- struct btrfs_inode_ref *refp, char *name)
+/*
+ * Read the content of symlink inode @ino of @root, into @target.
+ * NOTE: @target will not be \0 termiated, caller should handle it properly.
+ *
+ * Return the number of read data.
+ * Return <0 for error.
+ */
+int btrfs_readlink(struct btrfs_root *root, u64 ino, char *target)
{
struct btrfs_path path;
- struct btrfs_key *key;
- struct btrfs_inode_ref *ref;
- u64 res = -1ULL;
-
- key = btrfs_search_tree_key_type(root, inr, BTRFS_INODE_REF_KEY,
- &path);
-
- if (!key)
- return -1ULL;
-
- ref = btrfs_path_item_ptr(&path, struct btrfs_inode_ref);
- btrfs_inode_ref_to_cpu(ref);
-
- if (refp)
- *refp = *ref;
+ struct btrfs_key key;
+ struct btrfs_file_extent_item *fi;
+ int ret;
- if (name) {
- if (ref->name_len > BTRFS_NAME_MAX) {
- printf("%s: inode name too long: %u\n", __func__,
- ref->name_len);
- goto out;
- }
+ key.objectid = ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+ key.offset = 0;
+ btrfs_init_path(&path);
- memcpy(name, ref + 1, ref->name_len);
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
}
-
- res = key->offset;
-out:
- btrfs_free_path(&path);
- return res;
-}
-
-int btrfs_lookup_inode(const struct btrfs_root *root,
- struct btrfs_key *location,
- struct btrfs_inode_item *item,
- struct btrfs_root *new_root)
-{
- struct btrfs_root tmp_root = *root;
- struct btrfs_path path;
- int res = -1;
-
- if (location->type == BTRFS_ROOT_ITEM_KEY) {
- if (btrfs_find_root(location->objectid, &tmp_root, NULL))
- return -1;
-
- location->objectid = tmp_root.root_dirid;
- location->type = BTRFS_INODE_ITEM_KEY;
- location->offset = 0;
+ fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(path.nodes[0], fi) !=
+ BTRFS_FILE_EXTENT_INLINE) {
+ ret = -EUCLEAN;
+ error("Extent for symlink %llu must be INLINE type!", ino);
+ goto out;
}
-
- if (btrfs_search_tree(&tmp_root, location, &path))
- return res;
-
- if (btrfs_comp_keys(location, btrfs_path_leaf_key(&path)))
+ if (btrfs_file_extent_compression(path.nodes[0], fi) !=
+ BTRFS_COMPRESS_NONE) {
+ ret = -EUCLEAN;
+ error("Extent for symlink %llu must not be compressed!", ino);
goto out;
-
- if (item) {
- *item = *btrfs_path_item_ptr(&path, struct btrfs_inode_item);
- btrfs_inode_item_to_cpu(item);
}
-
- if (new_root)
- *new_root = tmp_root;
-
- res = 0;
-
+ if (btrfs_file_extent_ram_bytes(path.nodes[0], fi) >=
+ root->fs_info->sectorsize) {
+ ret = -EUCLEAN;
+ error("Symlink %llu extent data too large (%llu)!\n",
+ ino, btrfs_file_extent_ram_bytes(path.nodes[0], fi));
+ goto out;
+ }
+ read_extent_buffer(path.nodes[0], target,
+ btrfs_file_extent_inline_start(fi),
+ btrfs_file_extent_ram_bytes(path.nodes[0], fi));
+ ret = btrfs_file_extent_ram_bytes(path.nodes[0], fi);
out:
- btrfs_free_path(&path);
- return res;
+ btrfs_release_path(&path);
+ return ret;
}
-int btrfs_readlink(const struct btrfs_root *root, u64 inr, char *target)
+static int lookup_root_ref(struct btrfs_fs_info *fs_info,
+ u64 rootid, u64 *root_ret, u64 *dir_ret)
{
+ struct btrfs_root *root = fs_info->tree_root;
+ struct btrfs_root_ref *root_ref;
struct btrfs_path path;
struct btrfs_key key;
- struct btrfs_file_extent_item *extent;
- const char *data_ptr;
- int res = -1;
-
- key.objectid = inr;
- key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = 0;
-
- if (btrfs_search_tree(root, &key, &path))
- return -1;
-
- if (btrfs_comp_keys(&key, btrfs_path_leaf_key(&path)))
- goto out;
-
- extent = btrfs_path_item_ptr(&path, struct btrfs_file_extent_item);
- if (extent->type != BTRFS_FILE_EXTENT_INLINE) {
- printf("%s: Extent for symlink %llu not of INLINE type\n",
- __func__, inr);
+ int ret;
+
+ btrfs_init_path(&path);
+ key.objectid = rootid;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret < 0)
+ return ret;
+ /* Should not happen */
+ if (ret == 0) {
+ ret = -EUCLEAN;
goto out;
}
-
- btrfs_file_extent_item_to_cpu_inl(extent);
-
- if (extent->compression != BTRFS_COMPRESS_NONE) {
- printf("%s: Symlink %llu extent data compressed!\n", __func__,
- inr);
+ ret = btrfs_previous_item(root, &path, rootid, BTRFS_ROOT_BACKREF_KEY);
+ if (ret < 0)
goto out;
- } else if (extent->encryption != 0) {
- printf("%s: Symlink %llu extent data encrypted!\n", __func__,
- inr);
- goto out;
- } else if (extent->ram_bytes >= btrfs_info.sb.sectorsize) {
- printf("%s: Symlink %llu extent data too long (%llu)!\n",
- __func__, inr, extent->ram_bytes);
+ if (ret > 0) {
+ ret = -ENOENT;
goto out;
}
-
- data_ptr = (const char *) extent
- + offsetof(struct btrfs_file_extent_item, disk_bytenr);
-
- memcpy(target, data_ptr, extent->ram_bytes);
- target[extent->ram_bytes] = '\0';
- res = 0;
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ root_ref = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_root_ref);
+ *root_ret = key.offset;
+ *dir_ret = btrfs_root_ref_dirid(path.nodes[0], root_ref);
out:
- btrfs_free_path(&path);
- return res;
+ btrfs_release_path(&path);
+ return ret;
}
-/* inr must be a directory (for regular files with multiple hard links this
- function returns only one of the parents of the file) */
-static u64 get_parent_inode(struct btrfs_root *root, u64 inr,
- struct btrfs_inode_item *inode_item)
+/*
+ * To get the parent inode of @ino of @root.
+ *
+ * @root_ret and @ino_ret will be filled.
+ *
+ * NOTE: This function is not reliable. It can only get one parent inode.
+ * The get the proper parent inode, we need a full VFS inodes stack to
+ * resolve properly.
+ */
+static int get_parent_inode(struct btrfs_root *root, u64 ino,
+ struct btrfs_root **root_ret, u64 *ino_ret)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_path path;
struct btrfs_key key;
- u64 res;
-
- if (inr == BTRFS_FIRST_FREE_OBJECTID) {
- if (root->objectid != btrfs_info.fs_root.objectid) {
- u64 parent;
- struct btrfs_root_ref ref;
-
- parent = btrfs_lookup_root_ref(root->objectid, &ref,
- NULL);
- if (parent == -1ULL)
- return -1ULL;
+ int ret;
- if (btrfs_find_root(parent, root, NULL))
- return -1ULL;
+ if (ino == BTRFS_FIRST_FREE_OBJECTID) {
+ u64 parent_root = -1;
- inr = ref.dirid;
+ /* It's top level already, no more parent */
+ if (root->root_key.objectid == BTRFS_FS_TREE_OBJECTID) {
+ *root_ret = fs_info->fs_root;
+ *ino_ret = BTRFS_FIRST_FREE_OBJECTID;
+ return 0;
}
- if (inode_item) {
- key.objectid = inr;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
+ ret = lookup_root_ref(fs_info, root->root_key.objectid,
+ &parent_root, ino_ret);
+ if (ret < 0)
+ return ret;
- if (btrfs_lookup_inode(root, &key, inode_item, NULL))
- return -1ULL;
- }
+ key.objectid = parent_root;
+ key.type = BTRFS_ROOT_ITEM_KEY;
+ key.offset = (u64)-1;
+ *root_ret = btrfs_read_fs_root(fs_info, &key);
+ if (IS_ERR(*root_ret))
+ return PTR_ERR(*root_ret);
- return inr;
+ return 0;
}
- res = btrfs_lookup_inode_ref(root, inr, NULL, NULL);
- if (res == -1ULL)
- return -1ULL;
-
- if (inode_item) {
- key.objectid = res;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
-
- if (btrfs_lookup_inode(root, &key, inode_item, NULL))
- return -1ULL;
+ btrfs_init_path(&path);
+ key.objectid = ino;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ if (ret < 0)
+ return ret;
+ /* Should not happen */
+ if (ret == 0) {
+ ret = -EUCLEAN;
+ goto out;
}
-
- return res;
+ ret = btrfs_previous_item(root, &path, ino, BTRFS_INODE_REF_KEY);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ *root_ret = root;
+ *ino_ret = key.offset;
+out:
+ btrfs_release_path(&path);
+ return ret;
}
static inline int next_length(const char *path)
{
int res = 0;
- while (*path != '\0' && *path != '/' && res <= BTRFS_NAME_LEN)
- ++res, ++path;
+ while (*path != '\0' && *path != '/') {
+ ++res;
+ ++path;
+ if (res > BTRFS_NAME_LEN)
+ break;
+ }
return res;
}
@@ -209,175 +203,549 @@ static inline const char *skip_current_directories(const char *cur)
return cur;
}
-u64 btrfs_lookup_path(struct btrfs_root *root, u64 inr, const char *path,
- u8 *type_p, struct btrfs_inode_item *inode_item_p,
- int symlink_limit)
+/*
+ * Resolve one filename of @ino of @root.
+ *
+ * key_ret: The child key (either INODE_ITEM or ROOT_ITEM type)
+ * type_ret: BTRFS_FT_* of the child inode.
+ *
+ * Return 0 with above members filled.
+ * Return <0 for error.
+ */
+static int resolve_one_filename(struct btrfs_root *root, u64 ino,
+ const char *name, int namelen,
+ struct btrfs_key *key_ret, u8 *type_ret)
{
- struct btrfs_dir_item item;
- struct btrfs_inode_item inode_item;
- u8 type = BTRFS_FT_DIR;
- int len, have_inode = 0;
- const char *cur = path;
+ struct btrfs_dir_item *dir_item;
+ struct btrfs_path path;
+ int ret = 0;
+
+ btrfs_init_path(&path);
+
+ dir_item = btrfs_lookup_dir_item(NULL, root, &path, ino, name,
+ namelen, 0);
+ if (IS_ERR(dir_item)) {
+ ret = PTR_ERR(dir_item);
+ goto out;
+ }
+ btrfs_dir_item_key_to_cpu(path.nodes[0], dir_item, key_ret);
+ *type_ret = btrfs_dir_type(path.nodes[0], dir_item);
+out:
+ btrfs_release_path(&path);
+ return ret;
+}
+
+/*
+ * Resolve a full path @filename. The start point is @ino of @root.
+ *
+ * The result will be filled into @root_ret, @ino_ret and @type_ret.
+ */
+int btrfs_lookup_path(struct btrfs_root *root, u64 ino, const char *filename,
+ struct btrfs_root **root_ret, u64 *ino_ret,
+ u8 *type_ret, int symlink_limit)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *next_root;
+ struct btrfs_key key;
+ const char *cur = filename;
+ u64 next_ino;
+ u8 next_type;
+ u8 type;
+ int len;
+ int ret = 0;
+
+ /* If the path is absolute path, also search from fs root */
if (*cur == '/') {
- ++cur;
- inr = root->root_dirid;
+ root = fs_info->fs_root;
+ ino = btrfs_root_dirid(&root->root_item);
+ type = BTRFS_FT_DIR;
}
- do {
+ while (*cur != '\0') {
cur = skip_current_directories(cur);
len = next_length(cur);
if (len > BTRFS_NAME_LEN) {
- printf("%s: Name too long at \"%.*s\"\n", __func__,
+ error("%s: Name too long at \"%.*s\"", __func__,
BTRFS_NAME_LEN, cur);
- return -1ULL;
+ return -ENAMETOOLONG;
}
if (len == 1 && cur[0] == '.')
break;
if (len == 2 && cur[0] == '.' && cur[1] == '.') {
- cur += 2;
- inr = get_parent_inode(root, inr, &inode_item);
- if (inr == -1ULL)
- return -1ULL;
-
- type = BTRFS_FT_DIR;
- continue;
+ /* Go one level up */
+ ret = get_parent_inode(root, ino, &next_root, &next_ino);
+ if (ret < 0)
+ return ret;
+ root = next_root;
+ ino = next_ino;
+ goto next;
}
if (!*cur)
break;
-
- if (btrfs_lookup_dir_item(root, inr, cur, len, &item))
- return -1ULL;
- type = item.type;
- have_inode = 1;
- if (btrfs_lookup_inode(root, &item.location, &inode_item, root))
- return -1ULL;
+ ret = resolve_one_filename(root, ino, cur, len, &key, &type);
+ if (ret < 0)
+ return ret;
+
+ if (key.type == BTRFS_ROOT_ITEM_KEY) {
+ /* Child inode is a subvolume */
+
+ next_root = btrfs_read_fs_root(fs_info, &key);
+ if (IS_ERR(next_root))
+ return PTR_ERR(next_root);
+ root = next_root;
+ ino = btrfs_root_dirid(&root->root_item);
+ } else if (type == BTRFS_FT_SYMLINK && symlink_limit >= 0) {
+ /* Child inode is a symlink */
- if (item.type == BTRFS_FT_SYMLINK && symlink_limit >= 0) {
char *target;
- if (!symlink_limit) {
- printf("%s: Too much symlinks!\n", __func__);
- return -1ULL;
+ if (symlink_limit == 0) {
+ error("%s: Too much symlinks!", __func__);
+ return -EMLINK;
}
-
- target = malloc(min(inode_item.size + 1,
- (u64) btrfs_info.sb.sectorsize));
+ target = malloc(fs_info->sectorsize);
if (!target)
- return -1ULL;
-
- if (btrfs_readlink(root, item.location.objectid,
- target)) {
+ return -ENOMEM;
+ ret = btrfs_readlink(root, key.objectid, target);
+ if (ret < 0) {
free(target);
- return -1ULL;
+ return ret;
}
+ target[ret] = '\0';
+
+ ret = btrfs_lookup_path(root, ino, target, &next_root,
+ &next_ino, &next_type,
+ symlink_limit);
+ if (ret < 0)
+ return ret;
+ root = next_root;
+ ino = next_ino;
+ type = next_type;
+ } else {
+ /* Child inode is an inode */
+ ino = key.objectid;
+ }
+next:
+ cur += len;
+ }
- inr = btrfs_lookup_path(root, inr, target, &type,
- &inode_item, symlink_limit - 1);
+ if (!ret) {
+ *root_ret = root;
+ *ino_ret = ino;
+ *type_ret = type;
+ }
- free(target);
+ return ret;
+}
- if (inr == -1ULL)
- return -1ULL;
- } else if (item.type != BTRFS_FT_DIR && cur[len]) {
- printf("%s: \"%.*s\" not a directory\n", __func__,
- (int) (cur - path + len), path);
- return -1ULL;
- } else {
- inr = item.location.objectid;
- }
+/*
+ * Read out inline extent.
+ *
+ * Since inline extent should only exist for offset 0, no need for extra
+ * parameters.
+ * Truncating should be handled by the caller.
+ *
+ * Return the number of bytes read.
+ * Return <0 for error.
+ */
+int btrfs_read_extent_inline(struct btrfs_path *path,
+ struct btrfs_file_extent_item *fi, char *dest)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ int slot = path->slots[0];
+ char *cbuf = NULL;
+ char *dbuf = NULL;
+ u32 csize;
+ u32 dsize;
+ int ret;
+
+ csize = btrfs_file_extent_inline_item_len(leaf, btrfs_item_nr(slot));
+ if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
+ /* Uncompressed, just read it out */
+ read_extent_buffer(leaf, dest,
+ btrfs_file_extent_inline_start(fi),
+ csize);
+ return csize;
+ }
- cur += len;
- } while (*cur);
+ /* Compressed extent, prepare the compressed and data buffer */
+ dsize = btrfs_file_extent_ram_bytes(leaf, fi);
+ cbuf = malloc(csize);
+ dbuf = malloc(dsize);
+ if (!cbuf || !dbuf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ read_extent_buffer(leaf, cbuf, btrfs_file_extent_inline_start(fi),
+ csize);
+ ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi),
+ cbuf, csize, dbuf, dsize);
+ if (ret < 0 || ret != dsize) {
+ ret = -EIO;
+ goto out;
+ }
+ memcpy(dest, dbuf, dsize);
+ ret = dsize;
+out:
+ free(cbuf);
+ free(dbuf);
+ return ret;
+}
- if (type_p)
- *type_p = type;
+/*
+ * Read out regular extent.
+ *
+ * Truncating should be handled by the caller.
+ *
+ * @offset and @len should not cross the extent boundary.
+ * Return the number of bytes read.
+ * Return <0 for error.
+ */
+int btrfs_read_extent_reg(struct btrfs_path *path,
+ struct btrfs_file_extent_item *fi, u64 offset,
+ int len, char *dest)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
+ struct btrfs_key key;
+ u64 extent_num_bytes;
+ u64 disk_bytenr;
+ u64 read;
+ char *cbuf = NULL;
+ char *dbuf = NULL;
+ u32 csize;
+ u32 dsize;
+ bool finished = false;
+ int num_copies;
+ int i;
+ int slot = path->slots[0];
+ int ret;
+
+ btrfs_item_key_to_cpu(leaf, &key, slot);
+ extent_num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
+ ASSERT(IS_ALIGNED(offset, fs_info->sectorsize) &&
+ IS_ALIGNED(len, fs_info->sectorsize));
+ ASSERT(offset >= key.offset &&
+ offset + len <= key.offset + extent_num_bytes);
+
+ /* Preallocated or hole , fill @dest with zero */
+ if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_PREALLOC ||
+ btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
+ memset(dest, 0, len);
+ return len;
+ }
- if (inode_item_p) {
- if (!have_inode) {
- struct btrfs_key key;
+ if (btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE) {
+ u64 logical;
- key.objectid = inr;
- key.type = BTRFS_INODE_ITEM_KEY;
- key.offset = 0;
+ logical = btrfs_file_extent_disk_bytenr(leaf, fi) +
+ btrfs_file_extent_offset(leaf, fi) +
+ offset - key.offset;
+ read = len;
- if (btrfs_lookup_inode(root, &key, &inode_item, NULL))
- return -1ULL;
+ num_copies = btrfs_num_copies(fs_info, logical, len);
+ for (i = 1; i <= num_copies; i++) {
+ ret = read_extent_data(fs_info, dest, logical, &read, i);
+ if (ret < 0 || read != len)
+ continue;
+ finished = true;
+ break;
}
+ if (!finished)
+ return -EIO;
+ return len;
+ }
+
+ csize = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ dsize = btrfs_file_extent_ram_bytes(leaf, fi);
+ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ num_copies = btrfs_num_copies(fs_info, disk_bytenr, csize);
- *inode_item_p = inode_item;
+ cbuf = malloc_cache_aligned(csize);
+ dbuf = malloc_cache_aligned(dsize);
+ if (!cbuf || !dbuf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /* For compressed extent, we must read the whole on-disk extent */
+ for (i = 1; i <= num_copies; i++) {
+ read = csize;
+ ret = read_extent_data(fs_info, cbuf, disk_bytenr,
+ &read, i);
+ if (ret < 0 || read != csize)
+ continue;
+ finished = true;
+ break;
+ }
+ if (!finished) {
+ ret = -EIO;
+ goto out;
}
- return inr;
+ ret = btrfs_decompress(btrfs_file_extent_compression(leaf, fi), cbuf,
+ csize, dbuf, dsize);
+ if (ret != dsize) {
+ ret = -EIO;
+ goto out;
+ }
+ /* Then copy the needed part */
+ memcpy(dest, dbuf + btrfs_file_extent_offset(leaf, fi), len);
+ ret = len;
+out:
+ free(cbuf);
+ free(dbuf);
+ return ret;
}
-u64 btrfs_file_read(const struct btrfs_root *root, u64 inr, u64 offset,
- u64 size, char *buf)
+/*
+ * Get the first file extent that covers bytenr @file_offset.
+ *
+ * @file_offset must be aligned to sectorsize.
+ *
+ * return 0 for found, and path points to the file extent.
+ * return >0 for not found, and fill @next_offset.
+ * @next_offset can be 0 if there is no next file extent.
+ * return <0 for error.
+ */
+static int lookup_data_extent(struct btrfs_root *root, struct btrfs_path *path,
+ u64 ino, u64 file_offset, u64 *next_offset)
{
- struct btrfs_path path;
struct btrfs_key key;
- struct btrfs_file_extent_item *extent;
- int res = 0;
- u64 rd, rd_all = -1ULL;
+ struct btrfs_file_extent_item *fi;
+ u8 extent_type;
+ int ret = 0;
- key.objectid = inr;
+ ASSERT(IS_ALIGNED(file_offset, root->fs_info->sectorsize));
+ key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
- key.offset = offset;
-
- if (btrfs_search_tree(root, &key, &path))
- return -1ULL;
-
- if (btrfs_comp_keys(&key, btrfs_path_leaf_key(&path)) < 0) {
- if (btrfs_prev_slot(&path))
- goto out;
+ key.offset = file_offset;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ /* Error or we're already at the file extent */
+ if (ret <= 0)
+ return ret;
+ if (ret > 0) {
+ /* Check previous file extent */
+ ret = btrfs_previous_item(root, path, ino,
+ BTRFS_EXTENT_DATA_KEY);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ goto check_next;
+ }
+ /* Now the key.offset must be smaller than @file_offset */
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.objectid != ino ||
+ key.type != BTRFS_EXTENT_DATA_KEY)
+ goto check_next;
+
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(path->nodes[0], fi);
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ if (file_offset == 0)
+ return 0;
+ /* Inline extent should be the only extent, no next extent. */
+ *next_offset = 0;
+ return 1;
+ }
- if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path)))
- goto out;
+ /* This file extent covers @file_offset */
+ if (key.offset <= file_offset && key.offset +
+ btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
+ return 0;
+check_next:
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ *next_offset = 0;
+ return 1;
}
- rd_all = 0;
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_file_extent_item);
+ /* Next next data extent */
+ if (key.objectid != ino ||
+ key.type != BTRFS_EXTENT_DATA_KEY) {
+ *next_offset = 0;
+ return 1;
+ }
+ /* Current file extent already beyond @file_offset */
+ if (key.offset > file_offset) {
+ *next_offset = key.offset;
+ return 1;
+ }
+ /* This file extent covers @file_offset */
+ if (key.offset <= file_offset && key.offset +
+ btrfs_file_extent_num_bytes(path->nodes[0], fi) > file_offset)
+ return 0;
+ /* This file extent ends before @file_offset, check next */
+ ret = btrfs_next_item(root, path);
+ if (ret < 0)
+ return ret;
+ if (ret > 0) {
+ *next_offset = 0;
+ return 1;
+ }
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+ if (key.type != BTRFS_EXTENT_DATA_KEY || key.objectid != ino) {
+ *next_offset = 0;
+ return 1;
+ }
+ *next_offset = key.offset;
+ return 1;
+}
- do {
- if (btrfs_comp_keys_type(&key, btrfs_path_leaf_key(&path)))
- break;
+static int read_and_truncate_page(struct btrfs_path *path,
+ struct btrfs_file_extent_item *fi,
+ int start, int len, char *dest)
+{
+ struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
+ u64 aligned_start = round_down(start, fs_info->sectorsize);
+ u8 extent_type;
+ char *buf;
+ int page_off = start - aligned_start;
+ int page_len = fs_info->sectorsize - page_off;
+ int ret;
+
+ ASSERT(start + len <= aligned_start + fs_info->sectorsize);
+ buf = malloc_cache_aligned(fs_info->sectorsize);
+ if (!buf)
+ return -ENOMEM;
+
+ extent_type = btrfs_file_extent_type(leaf, fi);
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ ret = btrfs_read_extent_inline(path, fi, buf);
+ memcpy(dest, buf + page_off, min(page_len, ret));
+ free(buf);
+ return len;
+ }
- extent = btrfs_path_item_ptr(&path,
- struct btrfs_file_extent_item);
+ ret = btrfs_read_extent_reg(path, fi,
+ round_down(start, fs_info->sectorsize),
+ fs_info->sectorsize, buf);
+ if (ret < 0) {
+ free(buf);
+ return ret;
+ }
+ memcpy(dest, buf + page_off, page_len);
+ free(buf);
+ return len;
+}
- if (extent->type == BTRFS_FILE_EXTENT_INLINE) {
- btrfs_file_extent_item_to_cpu_inl(extent);
- rd = btrfs_read_extent_inline(&path, extent, offset,
- size, buf);
+int btrfs_file_read(struct btrfs_root *root, u64 ino, u64 file_offset, u64 len,
+ char *dest)
+{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_path path;
+ struct btrfs_key key;
+ u64 aligned_start = round_down(file_offset, fs_info->sectorsize);
+ u64 aligned_end = round_down(file_offset + len, fs_info->sectorsize);
+ u64 next_offset;
+ u64 cur = aligned_start;
+ int ret = 0;
+
+ btrfs_init_path(&path);
+
+ /* Set the whole dest all zero, so we won't need to bother holes */
+ memset(dest, 0, len);
+
+ /* Read out the leading unaligned part */
+ if (aligned_start != file_offset) {
+ ret = lookup_data_extent(root, &path, ino, aligned_start,
+ &next_offset);
+ if (ret < 0)
+ goto out;
+ if (ret == 0) {
+ /* Read the unaligned part out*/
+ fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_file_extent_item);
+ ret = read_and_truncate_page(&path, fi, file_offset,
+ round_up(file_offset, fs_info->sectorsize) -
+ file_offset, dest);
+ if (ret < 0)
+ goto out;
+ cur += fs_info->sectorsize;
} else {
- btrfs_file_extent_item_to_cpu(extent);
- rd = btrfs_read_extent_reg(&path, extent, offset, size,
- buf);
+ /* The whole file is a hole */
+ if (!next_offset) {
+ memset(dest, 0, len);
+ return len;
+ }
+ cur = next_offset;
}
+ }
+
+ /* Read the aligned part */
+ while (cur < aligned_end) {
+ u64 extent_num_bytes;
+ u8 type;
- if (rd == -1ULL) {
- printf("%s: Error reading extent\n", __func__);
- rd_all = -1;
+ btrfs_release_path(&path);
+ ret = lookup_data_extent(root, &path, ino, cur, &next_offset);
+ if (ret < 0)
goto out;
+ if (ret > 0) {
+ /* No next, direct exit */
+ if (!next_offset) {
+ ret = 0;
+ goto out;
+ }
+ }
+ fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ type = btrfs_file_extent_type(path.nodes[0], fi);
+ if (type == BTRFS_FILE_EXTENT_INLINE) {
+ ret = btrfs_read_extent_inline(&path, fi, dest);
+ goto out;
+ }
+ /* Skip holes, as we have zeroed the dest */
+ if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+ btrfs_file_extent_disk_bytenr(path.nodes[0], fi) == 0) {
+ cur = key.offset + btrfs_file_extent_num_bytes(
+ path.nodes[0], fi);
+ continue;
}
- offset = 0;
- buf += rd;
- rd_all += rd;
- size -= rd;
-
- if (!size)
- break;
- } while (!(res = btrfs_next_slot(&path)));
-
- if (res)
- return -1ULL;
+ /* Read the remaining part of the extent */
+ extent_num_bytes = btrfs_file_extent_num_bytes(path.nodes[0],
+ fi);
+ ret = btrfs_read_extent_reg(&path, fi, cur,
+ min(extent_num_bytes, aligned_end - cur),
+ dest + cur - file_offset);
+ if (ret < 0)
+ goto out;
+ cur += min(extent_num_bytes, aligned_end - cur);
+ }
+ /* Read the tailing unaligned part*/
+ if (file_offset + len != aligned_end) {
+ btrfs_release_path(&path);
+ ret = lookup_data_extent(root, &path, ino, aligned_end,
+ &next_offset);
+ /* <0 is error, >0 means no extent */
+ if (ret)
+ goto out;
+ fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_file_extent_item);
+ ret = read_and_truncate_page(&path, fi, aligned_end,
+ file_offset + len - aligned_end,
+ dest + aligned_end - file_offset);
+ }
out:
- btrfs_free_path(&path);
- return rd_all;
+ btrfs_release_path(&path);
+ if (ret < 0)
+ return ret;
+ return len;
}
diff --git a/fs/btrfs/kernel-shared/btrfs_tree.h b/fs/btrfs/kernel-shared/btrfs_tree.h
new file mode 100644
index 0000000000..6a76d1e456
--- /dev/null
+++ b/fs/btrfs/kernel-shared/btrfs_tree.h
@@ -0,0 +1,1333 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copied from kernel/include/uapi/linux/btrfs_btree.h.
+ *
+ * Only modified the header.
+ */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __BTRFS_TREE_H__
+#define __BTRFS_TREE_H__
+
+#include <linux/types.h>
+
+#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
+
+/*
+ * The max metadata block size (node size).
+ *
+ * This limit is somewhat artificial. The memmove and tree block locking cost
+ * go up with larger node size.
+ */
+#define BTRFS_MAX_METADATA_BLOCKSIZE 65536
+
+/*
+ * We can actually store much bigger names, but lets not confuse the rest
+ * of linux.
+ *
+ * btrfs_dir_item::name_len follows this limitation.
+ */
+#define BTRFS_NAME_LEN 255
+
+/*
+ * Objectids start from here.
+ *
+ * Check btrfs_disk_key for the meaning of objectids.
+ */
+
+/*
+ * Root tree holds pointers to all of the tree roots.
+ * Without special mention, the root tree contains the root bytenr of all other
+ * trees, except the chunk tree and the log tree.
+ *
+ * The super block contains the root bytenr of this tree.
+ */
+#define BTRFS_ROOT_TREE_OBJECTID 1ULL
+
+/*
+ * Extent tree stores information about which extents are in use, and backrefs
+ * for each extent.
+ */
+#define BTRFS_EXTENT_TREE_OBJECTID 2ULL
+
+/*
+ * Chunk tree stores btrfs logical address -> physical address mapping.
+ *
+ * The super block contains part of chunk tree for bootstrap, and contains
+ * the root bytenr of this tree.
+ */
+#define BTRFS_CHUNK_TREE_OBJECTID 3ULL
+
+/*
+ * Device tree stores info about which areas of a given device are in use,
+ * and physical address -> btrfs logical address mapping.
+ */
+#define BTRFS_DEV_TREE_OBJECTID 4ULL
+
+/* The fs tree is the first subvolume tree, storing files and directories. */
+#define BTRFS_FS_TREE_OBJECTID 5ULL
+
+/* Shows the directory objectid inside the root tree. */
+#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
+
+/* Csum tree holds checksums of all the data extents. */
+#define BTRFS_CSUM_TREE_OBJECTID 7ULL
+
+/* Quota tree holds quota configuration and tracking. */
+#define BTRFS_QUOTA_TREE_OBJECTID 8ULL
+
+/* UUID tree stores items that use the BTRFS_UUID_KEY* types. */
+#define BTRFS_UUID_TREE_OBJECTID 9ULL
+
+/* Free space cache tree (v2 space cache) tracks free space in block groups. */
+#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+
+/* Indicates device stats in the device tree. */
+#define BTRFS_DEV_STATS_OBJECTID 0ULL
+
+/* For storing balance parameters in the root tree. */
+#define BTRFS_BALANCE_OBJECTID -4ULL
+
+/* Orhpan objectid for tracking unlinked/truncated files. */
+#define BTRFS_ORPHAN_OBJECTID -5ULL
+
+/* Does write ahead logging to speed up fsyncs. */
+#define BTRFS_TREE_LOG_OBJECTID -6ULL
+#define BTRFS_TREE_LOG_FIXUP_OBJECTID -7ULL
+
+/* For space balancing. */
+#define BTRFS_TREE_RELOC_OBJECTID -8ULL
+#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
+
+/* Extent checksums, shared between the csum tree and log trees. */
+#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
+
+/* For storing free space cache (v1 space cache). */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
+/* The inode number assigned to the special inode for storing free ino cache. */
+#define BTRFS_FREE_INO_OBJECTID -12ULL
+
+/* Dummy objectid represents multiple objectids. */
+#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
+
+/* All files have objectids in this range. */
+#define BTRFS_FIRST_FREE_OBJECTID 256ULL
+#define BTRFS_LAST_FREE_OBJECTID -256ULL
+#define BTRFS_FIRST_CHUNK_TREE_OBJECTID 256ULL
+
+
+/*
+ * The device items go into the chunk tree.
+ *
+ * The key is in the form
+ * (BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY, <device_id>)
+ */
+#define BTRFS_DEV_ITEMS_OBJECTID 1ULL
+
+#define BTRFS_BTREE_INODE_OBJECTID 1
+
+#define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
+
+#define BTRFS_DEV_REPLACE_DEVID 0ULL
+
+/*
+ * Types start from here.
+ *
+ * Check btrfs_disk_key for details about types.
+ */
+
+/*
+ * Inode items have the data typically returned from stat and store other
+ * info about object characteristics.
+ *
+ * There is one for every file and dir in the FS.
+ */
+#define BTRFS_INODE_ITEM_KEY 1
+/* reserve 2-11 close to the inode for later flexibility */
+#define BTRFS_INODE_REF_KEY 12
+#define BTRFS_INODE_EXTREF_KEY 13
+#define BTRFS_XATTR_ITEM_KEY 24
+#define BTRFS_ORPHAN_ITEM_KEY 48
+
+/*
+ * Dir items are the name -> inode pointers in a directory.
+ *
+ * There is one for every name in a directory.
+ */
+#define BTRFS_DIR_LOG_ITEM_KEY 60
+#define BTRFS_DIR_LOG_INDEX_KEY 72
+#define BTRFS_DIR_ITEM_KEY 84
+#define BTRFS_DIR_INDEX_KEY 96
+
+/* Stores info (position, size ...) about a data extent of a file */
+#define BTRFS_EXTENT_DATA_KEY 108
+
+/*
+ * Extent csums are stored in a separate tree and hold csums for
+ * an entire extent on disk.
+ */
+#define BTRFS_EXTENT_CSUM_KEY 128
+
+/*
+ * Root items point to tree roots.
+ *
+ * They are typically in the root tree used by the super block to find all the
+ * other trees.
+ */
+#define BTRFS_ROOT_ITEM_KEY 132
+
+/*
+ * Root backrefs tie subvols and snapshots to the directory entries that
+ * reference them.
+ */
+#define BTRFS_ROOT_BACKREF_KEY 144
+
+/*
+ * Root refs make a fast index for listing all of the snapshots and
+ * subvolumes referenced by a given root. They point directly to the
+ * directory item in the root that references the subvol.
+ */
+#define BTRFS_ROOT_REF_KEY 156
+
+/*
+ * Extent items are in the extent tree.
+ *
+ * These record which blocks are used, and how many references there are.
+ */
+#define BTRFS_EXTENT_ITEM_KEY 168
+
+/*
+ * The same as the BTRFS_EXTENT_ITEM_KEY, except it's metadata we already know
+ * the length, so we save the level in key->offset instead of the length.
+ */
+#define BTRFS_METADATA_ITEM_KEY 169
+
+#define BTRFS_TREE_BLOCK_REF_KEY 176
+
+#define BTRFS_EXTENT_DATA_REF_KEY 178
+
+#define BTRFS_EXTENT_REF_V0_KEY 180
+
+#define BTRFS_SHARED_BLOCK_REF_KEY 182
+
+#define BTRFS_SHARED_DATA_REF_KEY 184
+
+/*
+ * Block groups give us hints into the extent allocation trees.
+ *
+ * Stores how many free space there is in a block group.
+ */
+#define BTRFS_BLOCK_GROUP_ITEM_KEY 192
+
+/*
+ * Every block group is represented in the free space tree by a free space info
+ * item, which stores some accounting information. It is keyed on
+ * (block_group_start, FREE_SPACE_INFO, block_group_length).
+ */
+#define BTRFS_FREE_SPACE_INFO_KEY 198
+
+/*
+ * A free space extent tracks an extent of space that is free in a block group.
+ * It is keyed on (start, FREE_SPACE_EXTENT, length).
+ */
+#define BTRFS_FREE_SPACE_EXTENT_KEY 199
+
+/*
+ * When a block group becomes very fragmented, we convert it to use bitmaps
+ * instead of extents.
+ *
+ * A free space bitmap is keyed on (start, FREE_SPACE_BITMAP, length).
+ * The corresponding item is a bitmap with (length / sectorsize) bits.
+ */
+#define BTRFS_FREE_SPACE_BITMAP_KEY 200
+
+#define BTRFS_DEV_EXTENT_KEY 204
+#define BTRFS_DEV_ITEM_KEY 216
+#define BTRFS_CHUNK_ITEM_KEY 228
+
+/*
+ * Records the overall state of the qgroups.
+ *
+ * There's only one instance of this key present,
+ * (0, BTRFS_QGROUP_STATUS_KEY, 0)
+ */
+#define BTRFS_QGROUP_STATUS_KEY 240
+/*
+ * Records the currently used space of the qgroup.
+ *
+ * One key per qgroup, (0, BTRFS_QGROUP_INFO_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_INFO_KEY 242
+
+/*
+ * Contains the user configured limits for the qgroup.
+ *
+ * One key per qgroup, (0, BTRFS_QGROUP_LIMIT_KEY, qgroupid).
+ */
+#define BTRFS_QGROUP_LIMIT_KEY 244
+
+/*
+ * Records the child-parent relationship of qgroups. For
+ * each relation, 2 keys are present:
+ * (childid, BTRFS_QGROUP_RELATION_KEY, parentid)
+ * (parentid, BTRFS_QGROUP_RELATION_KEY, childid)
+ */
+#define BTRFS_QGROUP_RELATION_KEY 246
+
+/* Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY. */
+#define BTRFS_BALANCE_ITEM_KEY 248
+
+/*
+ * The key type for tree items that are stored persistently, but do not need to
+ * exist for extended period of time. The items can exist in any tree.
+ *
+ * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - balance status item
+ * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
+ */
+#define BTRFS_TEMPORARY_ITEM_KEY 248
+
+/* Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY */
+#define BTRFS_DEV_STATS_KEY 249
+
+/*
+ * The key type for tree items that are stored persistently and usually exist
+ * for a long period, eg. filesystem lifetime. The item kinds can be status
+ * information, stats or preference values. The item can exist in any tree.
+ *
+ * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - device statistics, store IO stats in the device tree, one key for all
+ * stats
+ * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
+ */
+#define BTRFS_PERSISTENT_ITEM_KEY 249
+
+/*
+ * Persistently stores the device replace state in the device tree.
+ *
+ * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0).
+ */
+#define BTRFS_DEV_REPLACE_KEY 250
+
+/*
+ * Stores items that allow to quickly map UUIDs to something else.
+ *
+ * These items are part of the filesystem UUID tree.
+ * The key is built like this:
+ * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
+ */
+#define BTRFS_UUID_KEY_SUBVOL 251 /* for UUIDs assigned to subvols */
+#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252 /* for UUIDs assigned to
+ * received subvols */
+
+/*
+ * String items are for debugging.
+ *
+ * They just store a short string of data in the FS.
+ */
+#define BTRFS_STRING_ITEM_KEY 253
+
+
+
+/* 32 bytes in various csum fields */
+#define BTRFS_CSUM_SIZE 32
+
+/* Csum types */
+enum btrfs_csum_type {
+ BTRFS_CSUM_TYPE_CRC32 = 0,
+ BTRFS_CSUM_TYPE_XXHASH = 1,
+ BTRFS_CSUM_TYPE_SHA256 = 2,
+ BTRFS_CSUM_TYPE_BLAKE2 = 3,
+};
+
+/*
+ * Flags definitions for directory entry item type.
+ *
+ * Used by:
+ * struct btrfs_dir_item.type
+ *
+ * Values 0..7 must match common file type values in fs_types.h.
+ */
+#define BTRFS_FT_UNKNOWN 0
+#define BTRFS_FT_REG_FILE 1
+#define BTRFS_FT_DIR 2
+#define BTRFS_FT_CHRDEV 3
+#define BTRFS_FT_BLKDEV 4
+#define BTRFS_FT_FIFO 5
+#define BTRFS_FT_SOCK 6
+#define BTRFS_FT_SYMLINK 7
+#define BTRFS_FT_XATTR 8
+#define BTRFS_FT_MAX 9
+
+#define BTRFS_FSID_SIZE 16
+#define BTRFS_UUID_SIZE 16
+
+/*
+ * The key defines the order in the tree, and so it also defines (optimal)
+ * block layout.
+ *
+ * Objectid and offset are interpreted based on type.
+ * While normally for objectid, it either represents a root number, or an
+ * inode number.
+ *
+ * Type tells us things about the object, and is a kind of stream selector.
+ * Check the following URL for full references about btrfs_disk_key/btrfs_key:
+ * https://btrfs.wiki.kernel.org/index.php/Btree_Items
+ *
+ * btrfs_disk_key is in disk byte order. struct btrfs_key is always
+ * in cpu native order. Otherwise they are identical and their sizes
+ * should be the same (ie both packed)
+ */
+struct btrfs_disk_key {
+ __le64 objectid;
+ __u8 type;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_key {
+ __u64 objectid;
+ __u8 type;
+ __u64 offset;
+} __attribute__ ((__packed__));
+
+struct btrfs_dev_item {
+ /* The internal btrfs device id */
+ __le64 devid;
+
+ /* Size of the device */
+ __le64 total_bytes;
+
+ /* Bytes used */
+ __le64 bytes_used;
+
+ /* Optimal io alignment for this device */
+ __le32 io_align;
+
+ /* Optimal io width for this device */
+ __le32 io_width;
+
+ /* Minimal io size for this device */
+ __le32 sector_size;
+
+ /* Type and info about this device */
+ __le64 type;
+
+ /* Expected generation for this device */
+ __le64 generation;
+
+ /*
+ * Starting byte of this partition on the device,
+ * to allow for stripe alignment in the future.
+ */
+ __le64 start_offset;
+
+ /* Grouping information for allocation decisions */
+ __le32 dev_group;
+
+ /* Optimal seek speed 0-100 where 100 is fastest */
+ __u8 seek_speed;
+
+ /* Optimal bandwidth 0-100 where 100 is fastest */
+ __u8 bandwidth;
+
+ /* Btrfs generated uuid for this device */
+ __u8 uuid[BTRFS_UUID_SIZE];
+
+ /* UUID of FS who owns this device */
+ __u8 fsid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_stripe {
+ __le64 devid;
+ __le64 offset;
+ __u8 dev_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_chunk {
+ /* Size of this chunk in bytes */
+ __le64 length;
+
+ /* Objectid of the root referencing this chunk */
+ __le64 owner;
+
+ __le64 stripe_len;
+ __le64 type;
+
+ /* Optimal io alignment for this chunk */
+ __le32 io_align;
+
+ /* Optimal io width for this chunk */
+ __le32 io_width;
+
+ /* Minimal io size for this chunk */
+ __le32 sector_size;
+
+ /*
+ * 2^16 stripes is quite a lot, a second limit is the size of a single
+ * item in the btree.
+ */
+ __le16 num_stripes;
+
+ /* Sub stripes only matter for raid10 */
+ __le16 sub_stripes;
+ struct btrfs_stripe stripe;
+ /* additional stripes go here */
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_EXTENT 1
+#define BTRFS_FREE_SPACE_BITMAP 2
+
+struct btrfs_free_space_entry {
+ __le64 offset;
+ __le64 bytes;
+ __u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+ struct btrfs_disk_key location;
+ __le64 generation;
+ __le64 num_entries;
+ __le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
+#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
+#define BTRFS_HEADER_FLAG_RELOC (1ULL << 1)
+
+/* Super block flags */
+/* Errors detected */
+#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
+
+#define BTRFS_SUPER_FLAG_SEEDING (1ULL << 32)
+#define BTRFS_SUPER_FLAG_METADUMP (1ULL << 33)
+#define BTRFS_SUPER_FLAG_METADUMP_V2 (1ULL << 34)
+#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
+#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
+
+
+/*
+ * Items in the extent tree are used to record the objectid of the
+ * owner of the block and the number of references.
+ */
+struct btrfs_extent_item {
+ __le64 refs;
+ __le64 generation;
+ __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_item_v0 {
+ __le32 refs;
+} __attribute__ ((__packed__));
+
+
+#define BTRFS_EXTENT_FLAG_DATA (1ULL << 0)
+#define BTRFS_EXTENT_FLAG_TREE_BLOCK (1ULL << 1)
+
+/* Use full backrefs for extent pointers in the block */
+#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
+
+/*
+ * This flag is only used internally by scrub and may be changed at any time
+ * it is only declared here to avoid collisions.
+ */
+#define BTRFS_EXTENT_FLAG_SUPER (1ULL << 48)
+
+struct btrfs_tree_block_info {
+ struct btrfs_disk_key key;
+ __u8 level;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_data_ref {
+ __le64 root;
+ __le64 objectid;
+ __le64 offset;
+ __le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_shared_data_ref {
+ __le32 count;
+} __attribute__ ((__packed__));
+
+struct btrfs_extent_inline_ref {
+ __u8 type;
+ __le64 offset;
+} __attribute__ ((__packed__));
+
+/* Old style backrefs item */
+struct btrfs_extent_ref_v0 {
+ __le64 root;
+ __le64 generation;
+ __le64 objectid;
+ __le32 count;
+} __attribute__ ((__packed__));
+
+
+/* Dev extents record used space on individual devices.
+ *
+ * The owner field points back to the chunk allocation mapping tree that
+ * allocated the extent.
+ * The chunk tree uuid field is a way to double check the owner.
+ */
+struct btrfs_dev_extent {
+ __le64 chunk_tree;
+ __le64 chunk_objectid;
+ __le64 chunk_offset;
+ __le64 length;
+ __u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_ref {
+ __le64 index;
+ __le16 name_len;
+ /* Name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_inode_extref {
+ __le64 parent_objectid;
+ __le64 index;
+ __le16 name_len;
+ __u8 name[0];
+ /* Name goes here */
+} __attribute__ ((__packed__));
+
+struct btrfs_timespec {
+ __le64 sec;
+ __le32 nsec;
+} __attribute__ ((__packed__));
+
+/* Inode flags */
+#define BTRFS_INODE_NODATASUM (1 << 0)
+#define BTRFS_INODE_NODATACOW (1 << 1)
+#define BTRFS_INODE_READONLY (1 << 2)
+#define BTRFS_INODE_NOCOMPRESS (1 << 3)
+#define BTRFS_INODE_PREALLOC (1 << 4)
+#define BTRFS_INODE_SYNC (1 << 5)
+#define BTRFS_INODE_IMMUTABLE (1 << 6)
+#define BTRFS_INODE_APPEND (1 << 7)
+#define BTRFS_INODE_NODUMP (1 << 8)
+#define BTRFS_INODE_NOATIME (1 << 9)
+#define BTRFS_INODE_DIRSYNC (1 << 10)
+#define BTRFS_INODE_COMPRESS (1 << 11)
+
+#define BTRFS_INODE_ROOT_ITEM_INIT (1 << 31)
+
+#define BTRFS_INODE_FLAG_MASK \
+ (BTRFS_INODE_NODATASUM | \
+ BTRFS_INODE_NODATACOW | \
+ BTRFS_INODE_READONLY | \
+ BTRFS_INODE_NOCOMPRESS | \
+ BTRFS_INODE_PREALLOC | \
+ BTRFS_INODE_SYNC | \
+ BTRFS_INODE_IMMUTABLE | \
+ BTRFS_INODE_APPEND | \
+ BTRFS_INODE_NODUMP | \
+ BTRFS_INODE_NOATIME | \
+ BTRFS_INODE_DIRSYNC | \
+ BTRFS_INODE_COMPRESS | \
+ BTRFS_INODE_ROOT_ITEM_INIT)
+
+struct btrfs_inode_item {
+ /* Nfs style generation number */
+ __le64 generation;
+ /* Transid that last touched this inode */
+ __le64 transid;
+ __le64 size;
+ __le64 nbytes;
+ __le64 block_group;
+ __le32 nlink;
+ __le32 uid;
+ __le32 gid;
+ __le32 mode;
+ __le64 rdev;
+ __le64 flags;
+
+ /* Modification sequence number for NFS */
+ __le64 sequence;
+
+ /*
+ * A little future expansion, for more than this we can just grow the
+ * inode item and version it
+ */
+ __le64 reserved[4];
+ struct btrfs_timespec atime;
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec mtime;
+ struct btrfs_timespec otime;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_log_item {
+ __le64 end;
+} __attribute__ ((__packed__));
+
+struct btrfs_dir_item {
+ struct btrfs_disk_key location;
+ __le64 transid;
+ __le16 data_len;
+ __le16 name_len;
+ __u8 type;
+} __attribute__ ((__packed__));
+
+#define BTRFS_ROOT_SUBVOL_RDONLY (1ULL << 0)
+
+/*
+ * Internal in-memory flag that a subvolume has been marked for deletion but
+ * still visible as a directory
+ */
+#define BTRFS_ROOT_SUBVOL_DEAD (1ULL << 48)
+
+struct btrfs_root_item {
+ struct btrfs_inode_item inode;
+ __le64 generation;
+ __le64 root_dirid;
+ __le64 bytenr;
+ __le64 byte_limit;
+ __le64 bytes_used;
+ __le64 last_snapshot;
+ __le64 flags;
+ __le32 refs;
+ struct btrfs_disk_key drop_progress;
+ __u8 drop_level;
+ __u8 level;
+
+ /*
+ * The following fields appear after subvol_uuids+subvol_times
+ * were introduced.
+ */
+
+ /*
+ * This generation number is used to test if the new fields are valid
+ * and up to date while reading the root item. Every time the root item
+ * is written out, the "generation" field is copied into this field. If
+ * anyone ever mounted the fs with an older kernel, we will have
+ * mismatching generation values here and thus must invalidate the
+ * new fields. See btrfs_update_root and btrfs_find_last_root for
+ * details.
+ * The offset of generation_v2 is also used as the start for the memset
+ * when invalidating the fields.
+ */
+ __le64 generation_v2;
+ __u8 uuid[BTRFS_UUID_SIZE];
+ __u8 parent_uuid[BTRFS_UUID_SIZE];
+ __u8 received_uuid[BTRFS_UUID_SIZE];
+ __le64 ctransid; /* Updated when an inode changes */
+ __le64 otransid; /* Trans when created */
+ __le64 stransid; /* Trans when sent. Non-zero for received subvol. */
+ __le64 rtransid; /* Trans when received. Non-zero for received subvol.*/
+ struct btrfs_timespec ctime;
+ struct btrfs_timespec otime;
+ struct btrfs_timespec stime;
+ struct btrfs_timespec rtime;
+ __le64 reserved[8]; /* For future */
+} __attribute__ ((__packed__));
+
+/* This is used for both forward and backward root refs */
+struct btrfs_root_ref {
+ __le64 dirid;
+ __le64 sequence;
+ __le16 name_len;
+} __attribute__ ((__packed__));
+
+struct btrfs_disk_balance_args {
+ /*
+ * Profiles to operate on.
+ *
+ * SINGLE is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE.
+ */
+ __le64 profiles;
+
+ /*
+ * Usage filter
+ * BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
+ * BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
+ */
+ union {
+ __le64 usage;
+ struct {
+ __le32 usage_min;
+ __le32 usage_max;
+ };
+ };
+
+ /* Devid filter */
+ __le64 devid;
+
+ /* Devid subset filter [pstart..pend) */
+ __le64 pstart;
+ __le64 pend;
+
+ /* Btrfs virtual address space subset filter [vstart..vend) */
+ __le64 vstart;
+ __le64 vend;
+
+ /*
+ * Profile to convert to.
+ *
+ * SINGLE is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE.
+ */
+ __le64 target;
+
+ /* BTRFS_BALANCE_ARGS_* */
+ __le64 flags;
+
+ /*
+ * BTRFS_BALANCE_ARGS_LIMIT with value 'limit'.
+ * BTRFS_BALANCE_ARGS_LIMIT_RANGE - the extend version can use minimum
+ * and maximum.
+ */
+ union {
+ __le64 limit;
+ struct {
+ __le32 limit_min;
+ __le32 limit_max;
+ };
+ };
+
+ /*
+ * Process chunks that cross stripes_min..stripes_max devices,
+ * BTRFS_BALANCE_ARGS_STRIPES_RANGE.
+ */
+ __le32 stripes_min;
+ __le32 stripes_max;
+
+ __le64 unused[6];
+} __attribute__ ((__packed__));
+
+/*
+ * Stores balance parameters to disk so that balance can be properly
+ * resumed after crash or unmount.
+ */
+struct btrfs_balance_item {
+ /* BTRFS_BALANCE_* */
+ __le64 flags;
+
+ struct btrfs_disk_balance_args data;
+ struct btrfs_disk_balance_args meta;
+ struct btrfs_disk_balance_args sys;
+
+ __le64 unused[4];
+} __attribute__ ((__packed__));
+
+enum {
+ BTRFS_FILE_EXTENT_INLINE = 0,
+ BTRFS_FILE_EXTENT_REG = 1,
+ BTRFS_FILE_EXTENT_PREALLOC = 2,
+ BTRFS_NR_FILE_EXTENT_TYPES = 3,
+};
+
+enum btrfs_compression_type {
+ BTRFS_COMPRESS_NONE = 0,
+ BTRFS_COMPRESS_ZLIB = 1,
+ BTRFS_COMPRESS_LZO = 2,
+ BTRFS_COMPRESS_ZSTD = 3,
+ BTRFS_NR_COMPRESS_TYPES = 4,
+};
+
+struct btrfs_file_extent_item {
+ /* Transaction id that created this extent */
+ __le64 generation;
+ /*
+ * Max number of bytes to hold this extent in ram.
+ *
+ * When we split a compressed extent we can't know how big each of the
+ * resulting pieces will be. So, this is an upper limit on the size of
+ * the extent in ram instead of an exact limit.
+ */
+ __le64 ram_bytes;
+
+ /*
+ * 32 bits for the various ways we might encode the data,
+ * including compression and encryption. If any of these
+ * are set to something a given disk format doesn't understand
+ * it is treated like an incompat flag for reading and writing,
+ * but not for stat.
+ */
+ __u8 compression;
+ __u8 encryption;
+ __le16 other_encoding; /* Spare for later use */
+
+ /* Are we inline data or a real extent? */
+ __u8 type;
+
+ /*
+ * Disk space consumed by the extent, checksum blocks are not included
+ * in these numbers
+ *
+ * At this offset in the structure, the inline extent data start.
+ */
+ __le64 disk_bytenr;
+ __le64 disk_num_bytes;
+
+ /*
+ * The logical offset inside the file extent.
+ *
+ * This allows a file extent to point into the middle of an existing
+ * extent on disk, sharing it between two snapshots (useful if some
+ * bytes in the middle of the extent have changed).
+ */
+ __le64 offset;
+
+ /*
+ * The logical number of bytes this file extent is referencing (no
+ * csums included).
+ *
+ * This always reflects the size uncompressed and without encoding.
+ */
+ __le64 num_bytes;
+
+} __attribute__ ((__packed__));
+
+struct btrfs_csum_item {
+ __u8 csum;
+} __attribute__ ((__packed__));
+
+enum btrfs_dev_stat_values {
+ /* Disk I/O failure stats */
+ BTRFS_DEV_STAT_WRITE_ERRS, /* EIO or EREMOTEIO from lower layers */
+ BTRFS_DEV_STAT_READ_ERRS, /* EIO or EREMOTEIO from lower layers */
+ BTRFS_DEV_STAT_FLUSH_ERRS, /* EIO or EREMOTEIO from lower layers */
+
+ /* Stats for indirect indications for I/O failures */
+ BTRFS_DEV_STAT_CORRUPTION_ERRS, /* Checksum error, bytenr error or
+ * contents is illegal: this is an
+ * indication that the block was damaged
+ * during read or write, or written to
+ * wrong location or read from wrong
+ * location */
+ BTRFS_DEV_STAT_GENERATION_ERRS, /* An indication that blocks have not
+ * been written */
+
+ BTRFS_DEV_STAT_VALUES_MAX
+};
+
+struct btrfs_dev_stats_item {
+ /*
+ * Grow this item struct at the end for future enhancements and keep
+ * the existing values unchanged.
+ */
+ __le64 values[BTRFS_DEV_STAT_VALUES_MAX];
+} __attribute__ ((__packed__));
+
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0
+#define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1
+
+struct btrfs_dev_replace_item {
+ /*
+ * Grow this item struct at the end for future enhancements and keep
+ * the existing values unchanged.
+ */
+ __le64 src_devid;
+ __le64 cursor_left;
+ __le64 cursor_right;
+ __le64 cont_reading_from_srcdev_mode;
+
+ __le64 replace_state;
+ __le64 time_started;
+ __le64 time_stopped;
+ __le64 num_write_errors;
+ __le64 num_uncorrectable_read_errors;
+} __attribute__ ((__packed__));
+
+/* Different types of block groups (and chunks) */
+#define BTRFS_BLOCK_GROUP_DATA (1ULL << 0)
+#define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1)
+#define BTRFS_BLOCK_GROUP_METADATA (1ULL << 2)
+#define BTRFS_BLOCK_GROUP_RAID0 (1ULL << 3)
+#define BTRFS_BLOCK_GROUP_RAID1 (1ULL << 4)
+#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
+#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
+#define BTRFS_BLOCK_GROUP_RAID5 (1ULL << 7)
+#define BTRFS_BLOCK_GROUP_RAID6 (1ULL << 8)
+#define BTRFS_BLOCK_GROUP_RAID1C3 (1ULL << 9)
+#define BTRFS_BLOCK_GROUP_RAID1C4 (1ULL << 10)
+#define BTRFS_BLOCK_GROUP_RESERVED (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
+ BTRFS_SPACE_INFO_GLOBAL_RSV)
+
+enum btrfs_raid_types {
+ BTRFS_RAID_RAID10,
+ BTRFS_RAID_RAID1,
+ BTRFS_RAID_DUP,
+ BTRFS_RAID_RAID0,
+ BTRFS_RAID_SINGLE,
+ BTRFS_RAID_RAID5,
+ BTRFS_RAID_RAID6,
+ BTRFS_RAID_RAID1C3,
+ BTRFS_RAID_RAID1C4,
+ BTRFS_NR_RAID_TYPES
+};
+
+#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
+ BTRFS_BLOCK_GROUP_SYSTEM | \
+ BTRFS_BLOCK_GROUP_METADATA)
+
+#define BTRFS_BLOCK_GROUP_PROFILE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
+ BTRFS_BLOCK_GROUP_RAID1 | \
+ BTRFS_BLOCK_GROUP_RAID1C3 | \
+ BTRFS_BLOCK_GROUP_RAID1C4 | \
+ BTRFS_BLOCK_GROUP_RAID5 | \
+ BTRFS_BLOCK_GROUP_RAID6 | \
+ BTRFS_BLOCK_GROUP_DUP | \
+ BTRFS_BLOCK_GROUP_RAID10)
+#define BTRFS_BLOCK_GROUP_RAID56_MASK (BTRFS_BLOCK_GROUP_RAID5 | \
+ BTRFS_BLOCK_GROUP_RAID6)
+
+#define BTRFS_BLOCK_GROUP_RAID1_MASK (BTRFS_BLOCK_GROUP_RAID1 | \
+ BTRFS_BLOCK_GROUP_RAID1C3 | \
+ BTRFS_BLOCK_GROUP_RAID1C4)
+
+/*
+ * We need a bit for restriper to be able to tell when chunks of type
+ * SINGLE are available. This "extended" profile format is used in
+ * fs_info->avail_*_alloc_bits (in-memory) and balance item fields
+ * (on-disk). The corresponding on-disk bit in chunk.type is reserved
+ * to avoid remappings between two formats in future.
+ */
+#define BTRFS_AVAIL_ALLOC_BIT_SINGLE (1ULL << 48)
+
+/*
+ * A fake block group type that is used to communicate global block reserve
+ * size to userspace via the SPACE_INFO ioctl.
+ */
+#define BTRFS_SPACE_INFO_GLOBAL_RSV (1ULL << 49)
+
+#define BTRFS_EXTENDED_PROFILE_MASK (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
+ BTRFS_AVAIL_ALLOC_BIT_SINGLE)
+
+static inline __u64 chunk_to_extended(__u64 flags)
+{
+ if ((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)
+ flags |= BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+
+ return flags;
+}
+static inline __u64 extended_to_chunk(__u64 flags)
+{
+ return flags & ~BTRFS_AVAIL_ALLOC_BIT_SINGLE;
+}
+
+struct btrfs_block_group_item {
+ __le64 used;
+ __le64 chunk_objectid;
+ __le64 flags;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_info {
+ __le32 extent_count;
+ __le32 flags;
+} __attribute__ ((__packed__));
+
+#define BTRFS_FREE_SPACE_USING_BITMAPS (1ULL << 0)
+
+#define BTRFS_QGROUP_LEVEL_SHIFT 48
+static inline __u64 btrfs_qgroup_level(__u64 qgroupid)
+{
+ return qgroupid >> BTRFS_QGROUP_LEVEL_SHIFT;
+}
+
+/* Is subvolume quota turned on? */
+#define BTRFS_QGROUP_STATUS_FLAG_ON (1ULL << 0)
+
+/* Is qgroup rescan running? */
+#define BTRFS_QGROUP_STATUS_FLAG_RESCAN (1ULL << 1)
+
+/*
+ * Some qgroup entries are known to be out of date, either because the
+ * configuration has changed in a way that makes a rescan necessary, or
+ * because the fs has been mounted with a non-qgroup-aware version.
+ */
+#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
+
+#define BTRFS_QGROUP_STATUS_VERSION 1
+
+struct btrfs_qgroup_status_item {
+ __le64 version;
+ /*
+ * The generation is updated during every commit. As older
+ * versions of btrfs are not aware of qgroups, it will be
+ * possible to detect inconsistencies by checking the
+ * generation on mount time.
+ */
+ __le64 generation;
+
+ /* Flag definitions see above */
+ __le64 flags;
+
+ /*
+ * Only used during scanning to record the progress of the scan.
+ * It contains a logical address.
+ */
+ __le64 rescan;
+} __attribute__ ((__packed__));
+
+struct btrfs_qgroup_info_item {
+ __le64 generation;
+ __le64 rfer;
+ __le64 rfer_cmpr;
+ __le64 excl;
+ __le64 excl_cmpr;
+} __attribute__ ((__packed__));
+
+/*
+ * Flags definition for qgroup limits
+ *
+ * Used by:
+ * struct btrfs_qgroup_limit.flags
+ * struct btrfs_qgroup_limit_item.flags
+ */
+#define BTRFS_QGROUP_LIMIT_MAX_RFER (1ULL << 0)
+#define BTRFS_QGROUP_LIMIT_MAX_EXCL (1ULL << 1)
+#define BTRFS_QGROUP_LIMIT_RSV_RFER (1ULL << 2)
+#define BTRFS_QGROUP_LIMIT_RSV_EXCL (1ULL << 3)
+#define BTRFS_QGROUP_LIMIT_RFER_CMPR (1ULL << 4)
+#define BTRFS_QGROUP_LIMIT_EXCL_CMPR (1ULL << 5)
+
+struct btrfs_qgroup_limit_item {
+ /* Only updated when any of the other values change. */
+ __le64 flags;
+ __le64 max_rfer;
+ __le64 max_excl;
+ __le64 rsv_rfer;
+ __le64 rsv_excl;
+} __attribute__ ((__packed__));
+
+/*
+ * Just in case we somehow lose the roots and are not able to mount,
+ * we store an array of the roots from previous transactions in the super.
+ */
+#define BTRFS_NUM_BACKUP_ROOTS 4
+struct btrfs_root_backup {
+ __le64 tree_root;
+ __le64 tree_root_gen;
+
+ __le64 chunk_root;
+ __le64 chunk_root_gen;
+
+ __le64 extent_root;
+ __le64 extent_root_gen;
+
+ __le64 fs_root;
+ __le64 fs_root_gen;
+
+ __le64 dev_root;
+ __le64 dev_root_gen;
+
+ __le64 csum_root;
+ __le64 csum_root_gen;
+
+ __le64 total_bytes;
+ __le64 bytes_used;
+ __le64 num_devices;
+ /* future */
+ __le64 unused_64[4];
+
+ u8 tree_root_level;
+ u8 chunk_root_level;
+ u8 extent_root_level;
+ u8 fs_root_level;
+ u8 dev_root_level;
+ u8 csum_root_level;
+ /* future and to align */
+ u8 unused_8[10];
+} __attribute__ ((__packed__));
+
+/*
+ * This is a very generous portion of the super block, giving us room to
+ * translate 14 chunks with 3 stripes each.
+ */
+#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
+
+#define BTRFS_LABEL_SIZE 256
+
+/* The super block basically lists the main trees of the FS. */
+struct btrfs_super_block {
+ /* The first 4 fields must match struct btrfs_header */
+ u8 csum[BTRFS_CSUM_SIZE];
+ /* FS specific UUID, visible to user */
+ u8 fsid[BTRFS_FSID_SIZE];
+ __le64 bytenr; /* this block number */
+ __le64 flags;
+
+ /* Allowed to be different from the btrfs_header from here own down. */
+ __le64 magic;
+ __le64 generation;
+ __le64 root;
+ __le64 chunk_root;
+ __le64 log_root;
+
+ /* This will help find the new super based on the log root. */
+ __le64 log_root_transid;
+ __le64 total_bytes;
+ __le64 bytes_used;
+ __le64 root_dir_objectid;
+ __le64 num_devices;
+ __le32 sectorsize;
+ __le32 nodesize;
+ __le32 __unused_leafsize;
+ __le32 stripesize;
+ __le32 sys_chunk_array_size;
+ __le64 chunk_root_generation;
+ __le64 compat_flags;
+ __le64 compat_ro_flags;
+ __le64 incompat_flags;
+ __le16 csum_type;
+ u8 root_level;
+ u8 chunk_root_level;
+ u8 log_root_level;
+ struct btrfs_dev_item dev_item;
+
+ char label[BTRFS_LABEL_SIZE];
+
+ __le64 cache_generation;
+ __le64 uuid_tree_generation;
+
+ /* The UUID written into btree blocks */
+ u8 metadata_uuid[BTRFS_FSID_SIZE];
+
+ /* Future expansion */
+ __le64 reserved[28];
+ u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
+ struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
+} __attribute__ ((__packed__));
+
+/*
+ * Feature flags
+ *
+ * Used by:
+ * struct btrfs_super_block::(compat|compat_ro|incompat)_flags
+ * struct btrfs_ioctl_feature_flags
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE (1ULL << 0)
+
+/*
+ * Older kernels (< 4.9) on big-endian systems produced broken free space tree
+ * bitmaps, and btrfs-progs also used to corrupt the free space tree (versions
+ * < 4.7.3). If this bit is clear, then the free space tree cannot be trusted.
+ * btrfs-progs can also intentionally clear this bit to ask the kernel to
+ * rebuild the free space tree, however this might not work on older kernels
+ * that do not know about this bit. If not sure, clear the cache manually on
+ * first mount when booting older kernel versions.
+ */
+#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1)
+
+#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4)
+
+/*
+ * Older kernels tried to do bigger metadata blocks, but the
+ * code was pretty buggy. Lets not let them try anymore.
+ */
+#define BTRFS_FEATURE_INCOMPAT_BIG_METADATA (1ULL << 5)
+
+#define BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF (1ULL << 6)
+#define BTRFS_FEATURE_INCOMPAT_RAID56 (1ULL << 7)
+#define BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA (1ULL << 8)
+#define BTRFS_FEATURE_INCOMPAT_NO_HOLES (1ULL << 9)
+#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
+#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
+
+/*
+ * Compat flags that we support.
+ *
+ * If any incompat flags are set other than the ones specified below then we
+ * will fail to mount.
+ */
+#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL
+#define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL
+
+#define BTRFS_FEATURE_COMPAT_RO_SUPP \
+ (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
+ BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID)
+
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
+#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
+
+#define BTRFS_FEATURE_INCOMPAT_SUPP \
+ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
+ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
+ BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
+ BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
+ BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
+ BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
+ BTRFS_FEATURE_INCOMPAT_RAID56 | \
+ BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
+ BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
+ BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
+ BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
+ BTRFS_FEATURE_INCOMPAT_RAID1C34)
+
+#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
+ (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
+#define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL
+
+#define BTRFS_BACKREF_REV_MAX 256
+#define BTRFS_BACKREF_REV_SHIFT 56
+#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
+ BTRFS_BACKREF_REV_SHIFT)
+
+#define BTRFS_OLD_BACKREF_REV 0
+#define BTRFS_MIXED_BACKREF_REV 1
+
+#define BTRFS_MAX_LEVEL 8
+
+/* Every tree block (leaf or node) starts with this header. */
+struct btrfs_header {
+ /* These first four must match the super block */
+ u8 csum[BTRFS_CSUM_SIZE];
+ u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+ __le64 bytenr; /* Which block this node is supposed to live in */
+ __le64 flags;
+
+ /* Allowed to be different from the super from here on down. */
+ u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
+ __le64 generation;
+ __le64 owner;
+ __le32 nritems;
+ u8 level;
+} __attribute__ ((__packed__));
+
+/*
+ * A leaf is full of items. Offset and size tell us where to find
+ * the item in the leaf (relative to the start of the data area).
+ */
+struct btrfs_item {
+ struct btrfs_disk_key key;
+ __le32 offset;
+ __le32 size;
+} __attribute__ ((__packed__));
+
+/*
+ * leaves have an item area and a data area:
+ * [item0, item1....itemN] [free space] [dataN...data1, data0]
+ *
+ * The data is separate from the items to get the keys closer together
+ * during searches.
+ */
+struct btrfs_leaf {
+ struct btrfs_header header;
+ struct btrfs_item items[];
+} __attribute__ ((__packed__));
+
+/*
+ * All non-leaf blocks are nodes, they hold only keys and pointers to children
+ * blocks.
+ */
+struct btrfs_key_ptr {
+ struct btrfs_disk_key key;
+ __le64 blockptr;
+ __le64 generation;
+} __attribute__ ((__packed__));
+
+struct btrfs_node {
+ struct btrfs_header header;
+ struct btrfs_key_ptr ptrs[];
+} __attribute__ ((__packed__));
+
+#endif /* __BTRFS_TREE_H__ */
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
new file mode 100644
index 0000000000..a39ad72067
--- /dev/null
+++ b/fs/btrfs/root-tree.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include "ctree.h"
+
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
+ struct btrfs_root_item *item, struct btrfs_key *key)
+{
+ struct btrfs_path *path;
+ struct btrfs_key search_key;
+ struct btrfs_key found_key;
+ struct extent_buffer *l;
+ int ret;
+ int slot;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ search_key.objectid = objectid;
+ search_key.type = BTRFS_ROOT_ITEM_KEY;
+ search_key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ if (path->slots[0] == 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ BUG_ON(ret == 0);
+ l = path->nodes[0];
+ slot = path->slots[0] - 1;
+ btrfs_item_key_to_cpu(l, &found_key, slot);
+ if (found_key.type != BTRFS_ROOT_ITEM_KEY ||
+ found_key.objectid != objectid) {
+ ret = -ENOENT;
+ goto out;
+ }
+ read_extent_buffer(l, item, btrfs_item_ptr_offset(l, slot),
+ sizeof(*item));
+ memcpy(key, &found_key, sizeof(found_key));
+ ret = 0;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
diff --git a/fs/btrfs/root.c b/fs/btrfs/root.c
deleted file mode 100644
index 127b67fd1c..0000000000
--- a/fs/btrfs/root.c
+++ /dev/null
@@ -1,92 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include "btrfs.h"
-
-static void read_root_item(struct btrfs_path *p, struct btrfs_root_item *item)
-{
- u32 len;
- int reset = 0;
-
- len = btrfs_path_item_size(p);
- memcpy(item, btrfs_path_item_ptr(p, struct btrfs_root_item), len);
- btrfs_root_item_to_cpu(item);
-
- if (len < sizeof(*item))
- reset = 1;
- if (!reset && item->generation != item->generation_v2) {
- if (item->generation_v2 != 0)
- printf("%s: generation != generation_v2 in root item",
- __func__);
- reset = 1;
- }
- if (reset) {
- memset(&item->generation_v2, 0,
- sizeof(*item) - offsetof(struct btrfs_root_item,
- generation_v2));
- }
-}
-
-int btrfs_find_root(u64 objectid, struct btrfs_root *root,
- struct btrfs_root_item *root_item)
-{
- struct btrfs_path path;
- struct btrfs_root_item my_root_item;
-
- if (!btrfs_search_tree_key_type(&btrfs_info.tree_root, objectid,
- BTRFS_ROOT_ITEM_KEY, &path))
- return -1;
-
- if (!root_item)
- root_item = &my_root_item;
- read_root_item(&path, root_item);
-
- if (root) {
- root->objectid = objectid;
- root->bytenr = root_item->bytenr;
- root->root_dirid = root_item->root_dirid;
- }
-
- btrfs_free_path(&path);
- return 0;
-}
-
-u64 btrfs_lookup_root_ref(u64 subvolid, struct btrfs_root_ref *refp, char *name)
-{
- struct btrfs_path path;
- struct btrfs_key *key;
- struct btrfs_root_ref *ref;
- u64 res = -1ULL;
-
- key = btrfs_search_tree_key_type(&btrfs_info.tree_root, subvolid,
- BTRFS_ROOT_BACKREF_KEY, &path);
-
- if (!key)
- return -1ULL;
-
- ref = btrfs_path_item_ptr(&path, struct btrfs_root_ref);
- btrfs_root_ref_to_cpu(ref);
-
- if (refp)
- *refp = *ref;
-
- if (name) {
- if (ref->name_len > BTRFS_VOL_NAME_MAX) {
- printf("%s: volume name too long: %u\n", __func__,
- ref->name_len);
- goto out;
- }
-
- memcpy(name, ref + 1, ref->name_len);
- }
-
- res = key->offset;
-out:
- btrfs_free_path(&path);
- return res;
-}
-
diff --git a/fs/btrfs/subvolume.c b/fs/btrfs/subvolume.c
index 06e54f3310..2815673bcd 100644
--- a/fs/btrfs/subvolume.c
+++ b/fs/btrfs/subvolume.c
@@ -5,126 +5,230 @@
* 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
*/
-#include "btrfs.h"
#include <malloc.h>
+#include "ctree.h"
+#include "btrfs.h"
+#include "disk-io.h"
-static int get_subvol_name(u64 subvolid, char *name, int max_len)
+/*
+ * Resolve the path of ino inside subvolume @root into @path_ret.
+ *
+ * @path_ret must be at least PATH_MAX size.
+ */
+static int get_path_in_subvol(struct btrfs_root *root, u64 ino, char *path_ret)
{
- struct btrfs_root_ref rref;
- struct btrfs_inode_ref iref;
- struct btrfs_root root;
- u64 dir;
- char tmp[max(BTRFS_VOL_NAME_MAX, BTRFS_NAME_MAX)];
- char *ptr;
-
- ptr = name + max_len - 1;
- *ptr = '\0';
-
- while (subvolid != BTRFS_FS_TREE_OBJECTID) {
- subvolid = btrfs_lookup_root_ref(subvolid, &rref, tmp);
-
- if (subvolid == -1ULL)
- return -1;
-
- ptr -= rref.name_len + 1;
- if (ptr < name)
- goto too_long;
-
- memcpy(ptr + 1, tmp, rref.name_len);
- *ptr = '/';
-
- if (btrfs_find_root(subvolid, &root, NULL))
- return -1;
-
- dir = rref.dirid;
-
- while (dir != BTRFS_FIRST_FREE_OBJECTID) {
- dir = btrfs_lookup_inode_ref(&root, dir, &iref, tmp);
-
- if (dir == -1ULL)
- return -1;
-
- ptr -= iref.name_len + 1;
- if (ptr < name)
- goto too_long;
-
- memcpy(ptr + 1, tmp, iref.name_len);
- *ptr = '/';
+ struct btrfs_path path;
+ struct btrfs_key key;
+ char *tmp;
+ u64 cur = ino;
+ int ret = 0;
+
+ tmp = malloc(PATH_MAX);
+ if (!tmp)
+ return -ENOMEM;
+ tmp[0] = '\0';
+
+ btrfs_init_path(&path);
+ while (cur != BTRFS_FIRST_FREE_OBJECTID) {
+ struct btrfs_inode_ref *iref;
+ int name_len;
+
+ btrfs_release_path(&path);
+ key.objectid = cur;
+ key.type = BTRFS_INODE_REF_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
+ /* Impossible */
+ if (ret == 0)
+ ret = -EUCLEAN;
+ if (ret < 0)
+ goto out;
+ ret = btrfs_previous_item(root, &path, cur,
+ BTRFS_INODE_REF_KEY);
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret < 0)
+ goto out;
+
+ strncpy(tmp, path_ret, PATH_MAX);
+ iref = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_inode_ref);
+ name_len = btrfs_inode_ref_name_len(path.nodes[0],
+ iref);
+ if (name_len > BTRFS_NAME_LEN) {
+ ret = -ENAMETOOLONG;
+ goto out;
}
+ read_extent_buffer(path.nodes[0], path_ret,
+ (unsigned long)(iref + 1), name_len);
+ path_ret[name_len] = '/';
+ path_ret[name_len + 1] = '\0';
+ strncat(path_ret, tmp, PATH_MAX);
+
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ cur = key.offset;
}
-
- if (ptr == name + max_len - 1) {
- name[0] = '/';
- name[1] = '\0';
- } else {
- memmove(name, ptr, name + max_len - ptr);
- }
-
- return 0;
-
-too_long:
- printf("%s: subvolume name too long\n", __func__);
- return -1;
+out:
+ btrfs_release_path(&path);
+ free(tmp);
+ return ret;
}
-u64 btrfs_get_default_subvol_objectid(void)
+static int list_one_subvol(struct btrfs_root *root, char *path_ret)
{
- struct btrfs_dir_item item;
-
- if (btrfs_lookup_dir_item(&btrfs_info.tree_root,
- btrfs_info.sb.root_dir_objectid, "default", 7,
- &item))
- return BTRFS_FS_TREE_OBJECTID;
- return item.location.objectid;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_path path;
+ struct btrfs_key key;
+ char *tmp;
+ u64 cur = root->root_key.objectid;
+ int ret = 0;
+
+ tmp = malloc(PATH_MAX);
+ if (!tmp)
+ return -ENOMEM;
+ tmp[0] = '\0';
+ path_ret[0] = '\0';
+ btrfs_init_path(&path);
+ while (cur != BTRFS_FS_TREE_OBJECTID) {
+ struct btrfs_root_ref *rr;
+ struct btrfs_key location;
+ int name_len;
+ u64 ino;
+
+ key.objectid = cur;
+ key.type = BTRFS_ROOT_BACKREF_KEY;
+ key.offset = (u64)-1;
+ btrfs_release_path(&path);
+
+ ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
+ if (ret == 0)
+ ret = -EUCLEAN;
+ if (ret < 0)
+ goto out;
+ ret = btrfs_previous_item(tree_root, &path, cur,
+ BTRFS_ROOT_BACKREF_KEY);
+ if (ret > 0)
+ ret = -ENOENT;
+ if (ret < 0)
+ goto out;
+
+ /* Get the subvolume name */
+ rr = btrfs_item_ptr(path.nodes[0], path.slots[0],
+ struct btrfs_root_ref);
+ strncpy(tmp, path_ret, PATH_MAX);
+ name_len = btrfs_root_ref_name_len(path.nodes[0], rr);
+ if (name_len > BTRFS_NAME_LEN) {
+ ret = -ENAMETOOLONG;
+ goto out;
+ }
+ ino = btrfs_root_ref_dirid(path.nodes[0], rr);
+ read_extent_buffer(path.nodes[0], path_ret,
+ (unsigned long)(rr + 1), name_len);
+ path_ret[name_len] = '/';
+ path_ret[name_len + 1] = '\0';
+ strncat(path_ret, tmp, PATH_MAX);
+
+ /* Get the path inside the parent subvolume */
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ location.objectid = key.offset;
+ location.type = BTRFS_ROOT_ITEM_KEY;
+ location.offset = (u64)-1;
+ root = btrfs_read_fs_root(fs_info, &location);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ goto out;
+ }
+ ret = get_path_in_subvol(root, ino, path_ret);
+ if (ret < 0)
+ goto out;
+ cur = key.offset;
+ }
+ /* Add the leading '/' */
+ strncpy(tmp, path_ret, PATH_MAX);
+ strncpy(path_ret, "/", PATH_MAX);
+ strncat(path_ret, tmp, PATH_MAX);
+out:
+ btrfs_release_path(&path);
+ free(tmp);
+ return ret;
}
-static void list_subvols(u64 tree, char *nameptr, int max_name_len, int level)
+static int list_subvolums(struct btrfs_fs_info *fs_info)
{
- struct btrfs_key key, *found_key;
+ struct btrfs_root *tree_root = fs_info->tree_root;
+ struct btrfs_root *root;
struct btrfs_path path;
- struct btrfs_root_ref *ref;
- int res;
-
- key.objectid = tree;
- key.type = BTRFS_ROOT_REF_KEY;
+ struct btrfs_key key;
+ char *result;
+ int ret = 0;
+
+ result = malloc(PATH_MAX);
+ if (!result)
+ return -ENOMEM;
+
+ ret = list_one_subvol(fs_info->fs_root, result);
+ if (ret < 0)
+ goto out;
+ root = fs_info->fs_root;
+ printf("ID %llu gen %llu path %.*s\n",
+ root->root_key.objectid, btrfs_root_generation(&root->root_item),
+ PATH_MAX, result);
+
+ key.objectid = BTRFS_FIRST_FREE_OBJECTID;
+ key.type = BTRFS_ROOT_ITEM_KEY;
key.offset = 0;
-
- if (btrfs_search_tree(&btrfs_info.tree_root, &key, &path))
- return;
-
- do {
- found_key = btrfs_path_leaf_key(&path);
- if (btrfs_comp_keys_type(&key, found_key))
+ btrfs_init_path(&path);
+ ret = btrfs_search_slot(NULL, tree_root, &key, &path, 0, 0);
+ if (ret < 0)
+ goto out;
+ while (1) {
+ if (path.slots[0] >= btrfs_header_nritems(path.nodes[0]))
+ goto next;
+
+ btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
+ if (key.objectid > BTRFS_LAST_FREE_OBJECTID)
break;
-
- ref = btrfs_path_item_ptr(&path, struct btrfs_root_ref);
- btrfs_root_ref_to_cpu(ref);
-
- printf("ID %llu parent %llu name ", found_key->offset, tree);
- if (nameptr && !get_subvol_name(found_key->offset, nameptr,
- max_name_len))
- printf("%s\n", nameptr);
- else
- printf("%.*s\n", (int) ref->name_len,
- (const char *) (ref + 1));
-
- if (level > 0)
- list_subvols(found_key->offset, nameptr, max_name_len,
- level - 1);
- else
- printf("%s: Too much recursion, maybe skipping some "
- "subvolumes\n", __func__);
- } while (!(res = btrfs_next_slot(&path)));
-
- btrfs_free_path(&path);
+ if (key.objectid < BTRFS_FIRST_FREE_OBJECTID ||
+ key.type != BTRFS_ROOT_ITEM_KEY)
+ goto next;
+ key.offset = (u64)-1;
+ root = btrfs_read_fs_root(fs_info, &key);
+ if (IS_ERR(root)) {
+ ret = PTR_ERR(root);
+ if (ret == -ENOENT)
+ goto next;
+ }
+ ret = list_one_subvol(root, result);
+ if (ret < 0)
+ goto out;
+ printf("ID %llu gen %llu path %.*s\n",
+ root->root_key.objectid,
+ btrfs_root_generation(&root->root_item),
+ PATH_MAX, result);
+next:
+ ret = btrfs_next_item(tree_root, &path);
+ if (ret < 0)
+ goto out;
+ if (ret > 0) {
+ ret = 0;
+ break;
+ }
+ }
+out:
+ free(result);
+ return ret;
}
void btrfs_list_subvols(void)
{
- char *nameptr = malloc(4096);
+ struct btrfs_fs_info *fs_info = current_fs_info;
+ int ret;
- list_subvols(BTRFS_FS_TREE_OBJECTID, nameptr, nameptr ? 4096 : 0, 40);
-
- if (nameptr)
- free(nameptr);
+ if (!fs_info)
+ return;
+ ret = list_subvolums(fs_info);
+ if (ret < 0)
+ error("failed to list subvolume: %d", ret);
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
deleted file mode 100644
index 913a4d402e..0000000000
--- a/fs/btrfs/super.c
+++ /dev/null
@@ -1,257 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * BTRFS filesystem implementation for U-Boot
- *
- * 2017 Marek Behun, CZ.NIC, marek.behun@nic.cz
- */
-
-#include <common.h>
-#include <log.h>
-#include <memalign.h>
-#include <part.h>
-#include <linux/compat.h>
-#include "btrfs.h"
-
-#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN \
- | BTRFS_HEADER_FLAG_RELOC \
- | BTRFS_SUPER_FLAG_ERROR \
- | BTRFS_SUPER_FLAG_SEEDING \
- | BTRFS_SUPER_FLAG_METADUMP)
-
-#define BTRFS_SUPER_INFO_SIZE 4096
-
-/*
- * checks if a valid root backup is present.
- * considers the case when all root backups empty valid.
- * returns -1 in case of invalid root backup and 0 for valid.
- */
-static int btrfs_check_super_roots(struct btrfs_super_block *sb)
-{
- struct btrfs_root_backup *root_backup;
- int i, newest = -1;
- int num_empty = 0;
-
- for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; ++i) {
- root_backup = sb->super_roots + i;
-
- if (root_backup->tree_root == 0 && root_backup->tree_root_gen == 0)
- num_empty++;
-
- if (root_backup->tree_root_gen == sb->generation)
- newest = i;
- }
-
- if (num_empty == BTRFS_NUM_BACKUP_ROOTS) {
- return 0;
- } else if (newest >= 0) {
- return 0;
- }
-
- return -1;
-}
-
-static inline int is_power_of_2(u64 x)
-{
- return !(x & (x - 1));
-}
-
-static int btrfs_check_super_csum(char *raw_disk_sb)
-{
- struct btrfs_super_block *disk_sb =
- (struct btrfs_super_block *) raw_disk_sb;
- u16 csum_type = le16_to_cpu(disk_sb->csum_type);
-
- if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
- u32 crc = ~(u32) 0;
- const int csum_size = sizeof(crc);
- char result[csum_size];
-
- crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE, crc,
- BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
- btrfs_csum_final(crc, result);
-
- if (memcmp(raw_disk_sb, result, csum_size))
- return -1;
- } else {
- return -1;
- }
-
- return 0;
-}
-
-static int btrfs_check_super(struct btrfs_super_block *sb)
-{
- int ret = 0;
-
- if (sb->flags & ~BTRFS_SUPER_FLAG_SUPP) {
- printf("%s: Unsupported flags: %llu\n", __func__,
- sb->flags & ~BTRFS_SUPER_FLAG_SUPP);
- }
-
- if (sb->root_level > BTRFS_MAX_LEVEL) {
- printf("%s: tree_root level too big: %d >= %d\n", __func__,
- sb->root_level, BTRFS_MAX_LEVEL);
- ret = -1;
- }
-
- if (sb->chunk_root_level > BTRFS_MAX_LEVEL) {
- printf("%s: chunk_root level too big: %d >= %d\n", __func__,
- sb->chunk_root_level, BTRFS_MAX_LEVEL);
- ret = -1;
- }
-
- if (sb->log_root_level > BTRFS_MAX_LEVEL) {
- printf("%s: log_root level too big: %d >= %d\n", __func__,
- sb->log_root_level, BTRFS_MAX_LEVEL);
- ret = -1;
- }
-
- if (!is_power_of_2(sb->sectorsize) || sb->sectorsize < 4096 ||
- sb->sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
- printf("%s: invalid sectorsize %u\n", __func__,
- sb->sectorsize);
- ret = -1;
- }
-
- if (!is_power_of_2(sb->nodesize) || sb->nodesize < sb->sectorsize ||
- sb->nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
- printf("%s: invalid nodesize %u\n", __func__, sb->nodesize);
- ret = -1;
- }
-
- if (sb->nodesize != sb->__unused_leafsize) {
- printf("%s: invalid leafsize %u, should be %u\n", __func__,
- sb->__unused_leafsize, sb->nodesize);
- ret = -1;
- }
-
- if (!IS_ALIGNED(sb->root, sb->sectorsize)) {
- printf("%s: tree_root block unaligned: %llu\n", __func__,
- sb->root);
- ret = -1;
- }
-
- if (!IS_ALIGNED(sb->chunk_root, sb->sectorsize)) {
- printf("%s: chunk_root block unaligned: %llu\n", __func__,
- sb->chunk_root);
- ret = -1;
- }
-
- if (!IS_ALIGNED(sb->log_root, sb->sectorsize)) {
- printf("%s: log_root block unaligned: %llu\n", __func__,
- sb->log_root);
- ret = -1;
- }
-
- if (memcmp(sb->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
- printf("%s: dev_item UUID does not match fsid\n", __func__);
- ret = -1;
- }
-
- if (sb->bytes_used < 6*sb->nodesize) {
- printf("%s: bytes_used is too small %llu\n", __func__,
- sb->bytes_used);
- ret = -1;
- }
-
- if (!is_power_of_2(sb->stripesize)) {
- printf("%s: invalid stripesize %u\n", __func__, sb->stripesize);
- ret = -1;
- }
-
- if (sb->sys_chunk_array_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
- printf("%s: system chunk array too big %u > %u\n", __func__,
- sb->sys_chunk_array_size, BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
- ret = -1;
- }
-
- if (sb->sys_chunk_array_size < sizeof(struct btrfs_key) +
- sizeof(struct btrfs_chunk)) {
- printf("%s: system chunk array too small %u < %zu\n", __func__,
- sb->sys_chunk_array_size, sizeof(struct btrfs_key)
- + sizeof(struct btrfs_chunk));
- ret = -1;
- }
-
- return ret;
-}
-
-int btrfs_read_superblock(void)
-{
- const u64 superblock_offsets[4] = {
- 0x10000ull,
- 0x4000000ull,
- 0x4000000000ull,
- 0x4000000000000ull
- };
- ALLOC_CACHE_ALIGN_BUFFER(char, raw_sb, BTRFS_SUPER_INFO_SIZE);
- struct btrfs_super_block *sb = (struct btrfs_super_block *) raw_sb;
- u64 dev_total_bytes;
- int i;
-
- dev_total_bytes = (u64) btrfs_part_info->size * btrfs_part_info->blksz;
-
- btrfs_info.sb.generation = 0;
-
- for (i = 0; i < 4; ++i) {
- if (superblock_offsets[i] + sizeof(sb) > dev_total_bytes)
- break;
-
- if (!btrfs_devread(superblock_offsets[i], BTRFS_SUPER_INFO_SIZE,
- raw_sb))
- break;
-
- if (btrfs_check_super_csum(raw_sb)) {
- debug("%s: invalid checksum at superblock mirror %i\n",
- __func__, i);
- continue;
- }
-
- btrfs_super_block_to_cpu(sb);
-
- if (sb->magic != BTRFS_MAGIC) {
- debug("%s: invalid BTRFS magic 0x%016llX at "
- "superblock mirror %i\n", __func__, sb->magic, i);
- } else if (sb->bytenr != superblock_offsets[i]) {
- printf("%s: invalid bytenr 0x%016llX (expected "
- "0x%016llX) at superblock mirror %i\n",
- __func__, sb->bytenr, superblock_offsets[i], i);
- } else if (btrfs_check_super(sb)) {
- printf("%s: Checking superblock mirror %i failed\n",
- __func__, i);
- } else if (sb->generation > btrfs_info.sb.generation) {
- memcpy(&btrfs_info.sb, sb, sizeof(*sb));
- } else {
- /* Nothing */
- }
- }
-
- if (!btrfs_info.sb.generation) {
- debug("%s: No valid BTRFS superblock found!\n", __func__);
- return -1;
- }
-
- if (btrfs_check_super_roots(&btrfs_info.sb)) {
- printf("%s: No valid root_backup found!\n", __func__);
- return -1;
- }
-
- if (sb->sectorsize != PAGE_SIZE) {
- printf(
- "%s: Unsupported sector size (%u), only supports %u as sector size\n",
- __func__, sb->sectorsize, PAGE_SIZE);
- return -1;
- }
-
- if (btrfs_info.sb.num_devices != 1) {
- printf("%s: Unsupported number of devices (%lli). This driver "
- "only supports filesystem on one device.\n", __func__,
- btrfs_info.sb.num_devices);
- return -1;
- }
-
- debug("Chosen superblock with generation = %llu\n",
- btrfs_info.sb.generation);
-
- return 0;
-}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
new file mode 100644
index 0000000000..fcf52d4b0f
--- /dev/null
+++ b/fs/btrfs/volumes.c
@@ -0,0 +1,1173 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <stdlib.h>
+#include <common.h>
+#include <fs_internal.h>
+#include "ctree.h"
+#include "disk-io.h"
+#include "volumes.h"
+#include "extent-io.h"
+
+const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
+ [BTRFS_RAID_RAID10] = {
+ .sub_stripes = 2,
+ .dev_stripes = 1,
+ .devs_max = 0, /* 0 == as many as possible */
+ .devs_min = 4,
+ .tolerated_failures = 1,
+ .devs_increment = 2,
+ .ncopies = 2,
+ .nparity = 0,
+ .raid_name = "raid10",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID10,
+ },
+ [BTRFS_RAID_RAID1] = {
+ .sub_stripes = 1,
+ .dev_stripes = 1,
+ .devs_max = 2,
+ .devs_min = 2,
+ .tolerated_failures = 1,
+ .devs_increment = 2,
+ .ncopies = 2,
+ .nparity = 0,
+ .raid_name = "raid1",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID1,
+ },
+ [BTRFS_RAID_RAID1C3] = {
+ .sub_stripes = 1,
+ .dev_stripes = 1,
+ .devs_max = 3,
+ .devs_min = 3,
+ .tolerated_failures = 2,
+ .devs_increment = 3,
+ .ncopies = 3,
+ .raid_name = "raid1c3",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
+ },
+ [BTRFS_RAID_RAID1C4] = {
+ .sub_stripes = 1,
+ .dev_stripes = 1,
+ .devs_max = 4,
+ .devs_min = 4,
+ .tolerated_failures = 3,
+ .devs_increment = 4,
+ .ncopies = 4,
+ .raid_name = "raid1c4",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
+ },
+ [BTRFS_RAID_DUP] = {
+ .sub_stripes = 1,
+ .dev_stripes = 2,
+ .devs_max = 1,
+ .devs_min = 1,
+ .tolerated_failures = 0,
+ .devs_increment = 1,
+ .ncopies = 2,
+ .nparity = 0,
+ .raid_name = "dup",
+ .bg_flag = BTRFS_BLOCK_GROUP_DUP,
+ },
+ [BTRFS_RAID_RAID0] = {
+ .sub_stripes = 1,
+ .dev_stripes = 1,
+ .devs_max = 0,
+ .devs_min = 2,
+ .tolerated_failures = 0,
+ .devs_increment = 1,
+ .ncopies = 1,
+ .nparity = 0,
+ .raid_name = "raid0",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID0,
+ },
+ [BTRFS_RAID_SINGLE] = {
+ .sub_stripes = 1,
+ .dev_stripes = 1,
+ .devs_max = 1,
+ .devs_min = 1,
+ .tolerated_failures = 0,
+ .devs_increment = 1,
+ .ncopies = 1,
+ .nparity = 0,
+ .raid_name = "single",
+ .bg_flag = 0,
+ },
+ [BTRFS_RAID_RAID5] = {
+ .sub_stripes = 1,
+ .dev_stripes = 1,
+ .devs_max = 0,
+ .devs_min = 2,
+ .tolerated_failures = 1,
+ .devs_increment = 1,
+ .ncopies = 1,
+ .nparity = 1,
+ .raid_name = "raid5",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID5,
+ },
+ [BTRFS_RAID_RAID6] = {
+ .sub_stripes = 1,
+ .dev_stripes = 1,
+ .devs_max = 0,
+ .devs_min = 3,
+ .tolerated_failures = 2,
+ .devs_increment = 1,
+ .ncopies = 1,
+ .nparity = 2,
+ .raid_name = "raid6",
+ .bg_flag = BTRFS_BLOCK_GROUP_RAID6,
+ },
+};
+
+struct stripe {
+ struct btrfs_device *dev;
+ u64 physical;
+};
+
+static inline int nr_parity_stripes(struct map_lookup *map)
+{
+ if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+ return 1;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ return 2;
+ else
+ return 0;
+}
+
+static inline int nr_data_stripes(struct map_lookup *map)
+{
+ return map->num_stripes - nr_parity_stripes(map);
+}
+
+#define is_parity_stripe(x) ( ((x) == BTRFS_RAID5_P_STRIPE) || ((x) == BTRFS_RAID6_Q_STRIPE) )
+
+static LIST_HEAD(fs_uuids);
+
+/*
+ * Find a device specified by @devid or @uuid in the list of @fs_devices, or
+ * return NULL.
+ *
+ * If devid and uuid are both specified, the match must be exact, otherwise
+ * only devid is used.
+ */
+static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
+ u64 devid, u8 *uuid)
+{
+ struct list_head *head = &fs_devices->devices;
+ struct btrfs_device *dev;
+
+ list_for_each_entry(dev, head, dev_list) {
+ if (dev->devid == devid &&
+ (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
+ return dev;
+ }
+ }
+ return NULL;
+}
+
+static struct btrfs_fs_devices *find_fsid(u8 *fsid, u8 *metadata_uuid)
+{
+ struct btrfs_fs_devices *fs_devices;
+
+ list_for_each_entry(fs_devices, &fs_uuids, list) {
+ if (metadata_uuid && (memcmp(fsid, fs_devices->fsid,
+ BTRFS_FSID_SIZE) == 0) &&
+ (memcmp(metadata_uuid, fs_devices->metadata_uuid,
+ BTRFS_FSID_SIZE) == 0)) {
+ return fs_devices;
+ } else if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0){
+ return fs_devices;
+ }
+ }
+ return NULL;
+}
+
+static int device_list_add(struct btrfs_super_block *disk_super,
+ u64 devid, struct blk_desc *desc,
+ struct disk_partition *part,
+ struct btrfs_fs_devices **fs_devices_ret)
+{
+ struct btrfs_device *device;
+ struct btrfs_fs_devices *fs_devices;
+ u64 found_transid = btrfs_super_generation(disk_super);
+ bool metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
+ BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+
+ if (metadata_uuid)
+ fs_devices = find_fsid(disk_super->fsid,
+ disk_super->metadata_uuid);
+ else
+ fs_devices = find_fsid(disk_super->fsid, NULL);
+
+ if (!fs_devices) {
+ fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
+ if (!fs_devices)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&fs_devices->devices);
+ list_add(&fs_devices->list, &fs_uuids);
+ memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
+ if (metadata_uuid)
+ memcpy(fs_devices->metadata_uuid,
+ disk_super->metadata_uuid, BTRFS_FSID_SIZE);
+ else
+ memcpy(fs_devices->metadata_uuid, fs_devices->fsid,
+ BTRFS_FSID_SIZE);
+
+ fs_devices->latest_devid = devid;
+ fs_devices->latest_trans = found_transid;
+ fs_devices->lowest_devid = (u64)-1;
+ device = NULL;
+ } else {
+ device = find_device(fs_devices, devid,
+ disk_super->dev_item.uuid);
+ }
+ if (!device) {
+ device = kzalloc(sizeof(*device), GFP_NOFS);
+ if (!device) {
+ /* we can safely leave the fs_devices entry around */
+ return -ENOMEM;
+ }
+ device->devid = devid;
+ device->desc = desc;
+ device->part = part;
+ device->generation = found_transid;
+ memcpy(device->uuid, disk_super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ device->total_devs = btrfs_super_num_devices(disk_super);
+ device->super_bytes_used = btrfs_super_bytes_used(disk_super);
+ device->total_bytes =
+ btrfs_stack_device_total_bytes(&disk_super->dev_item);
+ device->bytes_used =
+ btrfs_stack_device_bytes_used(&disk_super->dev_item);
+ list_add(&device->dev_list, &fs_devices->devices);
+ device->fs_devices = fs_devices;
+ } else if (!device->desc || !device->part) {
+ /*
+ * The existing device has newer generation, so this one could
+ * be a stale one, don't add it.
+ */
+ if (found_transid < device->generation) {
+ error(
+ "adding devid %llu gen %llu but found an existing device gen %llu",
+ device->devid, found_transid,
+ device->generation);
+ return -EEXIST;
+ } else {
+ device->desc = desc;
+ device->part = part;
+ }
+ }
+
+
+ if (found_transid > fs_devices->latest_trans) {
+ fs_devices->latest_devid = devid;
+ fs_devices->latest_trans = found_transid;
+ }
+ if (fs_devices->lowest_devid > devid) {
+ fs_devices->lowest_devid = devid;
+ }
+ *fs_devices_ret = fs_devices;
+ return 0;
+}
+
+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+{
+ struct btrfs_fs_devices *seed_devices;
+ struct btrfs_device *device;
+ int ret = 0;
+
+again:
+ if (!fs_devices)
+ return 0;
+ while (!list_empty(&fs_devices->devices)) {
+ device = list_entry(fs_devices->devices.next,
+ struct btrfs_device, dev_list);
+ list_del(&device->dev_list);
+ /* free the memory */
+ free(device);
+ }
+
+ seed_devices = fs_devices->seed;
+ fs_devices->seed = NULL;
+ if (seed_devices) {
+ struct btrfs_fs_devices *orig;
+
+ orig = fs_devices;
+ fs_devices = seed_devices;
+ list_del(&orig->list);
+ free(orig);
+ goto again;
+ } else {
+ list_del(&fs_devices->list);
+ free(fs_devices);
+ }
+
+ return ret;
+}
+
+void btrfs_close_all_devices(void)
+{
+ struct btrfs_fs_devices *fs_devices;
+
+ while (!list_empty(&fs_uuids)) {
+ fs_devices = list_entry(fs_uuids.next, struct btrfs_fs_devices,
+ list);
+ btrfs_close_devices(fs_devices);
+ }
+}
+
+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices)
+{
+ struct btrfs_device *device;
+
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ if (!device->desc || !device->part) {
+ printf("no device found for devid %llu, skip it \n",
+ device->devid);
+ continue;
+ }
+ }
+ return 0;
+}
+
+int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part,
+ struct btrfs_fs_devices **fs_devices_ret,
+ u64 *total_devs)
+{
+ struct btrfs_super_block *disk_super;
+ char buf[BTRFS_SUPER_INFO_SIZE];
+ int ret;
+ u64 devid;
+
+ disk_super = (struct btrfs_super_block *)buf;
+ ret = btrfs_read_dev_super(desc, part, disk_super);
+ if (ret < 0)
+ return -EIO;
+ devid = btrfs_stack_device_id(&disk_super->dev_item);
+ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_METADUMP)
+ *total_devs = 1;
+ else
+ *total_devs = btrfs_super_num_devices(disk_super);
+
+ ret = device_list_add(disk_super, devid, desc, part, fs_devices_ret);
+
+ return ret;
+}
+
+struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
+ u8 *uuid, u8 *fsid)
+{
+ struct btrfs_device *device;
+ struct btrfs_fs_devices *cur_devices;
+
+ cur_devices = fs_info->fs_devices;
+ while (cur_devices) {
+ if (!fsid ||
+ !memcmp(cur_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE)) {
+ device = find_device(cur_devices, devid, uuid);
+ if (device)
+ return device;
+ }
+ cur_devices = cur_devices->seed;
+ }
+ return NULL;
+}
+
+static struct btrfs_device *fill_missing_device(u64 devid)
+{
+ struct btrfs_device *device;
+
+ device = kzalloc(sizeof(*device), GFP_NOFS);
+ return device;
+}
+
+/*
+ * slot == -1: SYSTEM chunk
+ * return -EIO on error, otherwise return 0
+ */
+int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk,
+ int slot, u64 logical)
+{
+ u64 length;
+ u64 stripe_len;
+ u16 num_stripes;
+ u16 sub_stripes;
+ u64 type;
+ u32 chunk_ondisk_size;
+ u32 sectorsize = fs_info->sectorsize;
+
+ /*
+ * Basic chunk item size check. Note that btrfs_chunk already contains
+ * one stripe, so no "==" check.
+ */
+ if (slot >= 0 &&
+ btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) {
+ error("invalid chunk item size, have %u expect [%zu, %zu)",
+ btrfs_item_size_nr(leaf, slot),
+ sizeof(struct btrfs_chunk),
+ BTRFS_LEAF_DATA_SIZE(fs_info));
+ return -EUCLEAN;
+ }
+ length = btrfs_chunk_length(leaf, chunk);
+ stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+ type = btrfs_chunk_type(leaf, chunk);
+
+ if (num_stripes == 0) {
+ error("invalid num_stripes, have %u expect non-zero",
+ num_stripes);
+ return -EUCLEAN;
+ }
+ if (slot >= 0 && btrfs_chunk_item_size(num_stripes) !=
+ btrfs_item_size_nr(leaf, slot)) {
+ error("invalid chunk item size, have %u expect %lu",
+ btrfs_item_size_nr(leaf, slot),
+ btrfs_chunk_item_size(num_stripes));
+ return -EUCLEAN;
+ }
+
+ /*
+ * These valid checks may be insufficient to cover every corner cases.
+ */
+ if (!IS_ALIGNED(logical, sectorsize)) {
+ error("invalid chunk logical %llu", logical);
+ return -EIO;
+ }
+ if (btrfs_chunk_sector_size(leaf, chunk) != sectorsize) {
+ error("invalid chunk sectorsize %llu",
+ (unsigned long long)btrfs_chunk_sector_size(leaf, chunk));
+ return -EIO;
+ }
+ if (!length || !IS_ALIGNED(length, sectorsize)) {
+ error("invalid chunk length %llu", length);
+ return -EIO;
+ }
+ if (stripe_len != BTRFS_STRIPE_LEN) {
+ error("invalid chunk stripe length: %llu", stripe_len);
+ return -EIO;
+ }
+ /* Check on chunk item type */
+ if (slot == -1 && (type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
+ error("invalid chunk type %llu", type);
+ return -EIO;
+ }
+ if (type & ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
+ error("unrecognized chunk type: %llu",
+ ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
+ BTRFS_BLOCK_GROUP_PROFILE_MASK) & type);
+ return -EIO;
+ }
+ if (!(type & BTRFS_BLOCK_GROUP_TYPE_MASK)) {
+ error("missing chunk type flag: %llu", type);
+ return -EIO;
+ }
+ if (!(is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK) ||
+ (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0)) {
+ error("conflicting chunk type detected: %llu", type);
+ return -EIO;
+ }
+ if ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) &&
+ !is_power_of_2(type & BTRFS_BLOCK_GROUP_PROFILE_MASK)) {
+ error("conflicting chunk profile detected: %llu", type);
+ return -EIO;
+ }
+
+ chunk_ondisk_size = btrfs_chunk_item_size(num_stripes);
+ /*
+ * Btrfs_chunk contains at least one stripe, and for sys_chunk
+ * it can't exceed the system chunk array size
+ * For normal chunk, it should match its chunk item size.
+ */
+ if (num_stripes < 1 ||
+ (slot == -1 && chunk_ondisk_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) ||
+ (slot >= 0 && chunk_ondisk_size > btrfs_item_size_nr(leaf, slot))) {
+ error("invalid num_stripes: %u", num_stripes);
+ return -EIO;
+ }
+ /*
+ * Device number check against profile
+ */
+ if ((type & BTRFS_BLOCK_GROUP_RAID10 && (sub_stripes != 2 ||
+ !IS_ALIGNED(num_stripes, sub_stripes))) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1C3 && num_stripes < 3) ||
+ (type & BTRFS_BLOCK_GROUP_RAID1C4 && num_stripes < 4) ||
+ (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
+ (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
+ (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
+ ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
+ num_stripes != 1)) {
+ error("Invalid num_stripes:sub_stripes %u:%u for profile %llu",
+ num_stripes, sub_stripes,
+ type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+/*
+ * Slot is used to verify the chunk item is valid
+ *
+ * For sys chunk in superblock, pass -1 to indicate sys chunk.
+ */
+static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk, int slot)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct map_lookup *map;
+ struct cache_extent *ce;
+ u64 logical;
+ u64 length;
+ u64 devid;
+ u8 uuid[BTRFS_UUID_SIZE];
+ int num_stripes;
+ int ret;
+ int i;
+
+ logical = key->offset;
+ length = btrfs_chunk_length(leaf, chunk);
+ num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ /* Validation check */
+ ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, slot, logical);
+ if (ret) {
+ error("%s checksums match, but it has an invalid chunk, %s",
+ (slot == -1) ? "Superblock" : "Metadata",
+ (slot == -1) ? "try btrfsck --repair -s <superblock> ie, 0,1,2" : "");
+ return ret;
+ }
+
+ ce = search_cache_extent(&map_tree->cache_tree, logical);
+
+ /* already mapped? */
+ if (ce && ce->start <= logical && ce->start + ce->size > logical) {
+ return 0;
+ }
+
+ map = kmalloc(btrfs_map_lookup_size(num_stripes), GFP_NOFS);
+ if (!map)
+ return -ENOMEM;
+
+ map->ce.start = logical;
+ map->ce.size = length;
+ map->num_stripes = num_stripes;
+ map->io_width = btrfs_chunk_io_width(leaf, chunk);
+ map->io_align = btrfs_chunk_io_align(leaf, chunk);
+ map->sector_size = btrfs_chunk_sector_size(leaf, chunk);
+ map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
+ map->type = btrfs_chunk_type(leaf, chunk);
+ map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
+
+ for (i = 0; i < num_stripes; i++) {
+ map->stripes[i].physical =
+ btrfs_stripe_offset_nr(leaf, chunk, i);
+ devid = btrfs_stripe_devid_nr(leaf, chunk, i);
+ read_extent_buffer(leaf, uuid, (unsigned long)
+ btrfs_stripe_dev_uuid_nr(chunk, i),
+ BTRFS_UUID_SIZE);
+ map->stripes[i].dev = btrfs_find_device(fs_info, devid, uuid,
+ NULL);
+ if (!map->stripes[i].dev) {
+ map->stripes[i].dev = fill_missing_device(devid);
+ printf("warning, device %llu is missing\n",
+ (unsigned long long)devid);
+ list_add(&map->stripes[i].dev->dev_list,
+ &fs_info->fs_devices->devices);
+ }
+
+ }
+ ret = insert_cache_extent(&map_tree->cache_tree, &map->ce);
+ if (ret < 0) {
+ errno = -ret;
+ error("failed to add chunk map start=%llu len=%llu: %d (%m)",
+ map->ce.start, map->ce.size, ret);
+ }
+
+ return ret;
+}
+
+static int fill_device_from_item(struct extent_buffer *leaf,
+ struct btrfs_dev_item *dev_item,
+ struct btrfs_device *device)
+{
+ unsigned long ptr;
+
+ device->devid = btrfs_device_id(leaf, dev_item);
+ device->total_bytes = btrfs_device_total_bytes(leaf, dev_item);
+ device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
+ device->type = btrfs_device_type(leaf, dev_item);
+ device->io_align = btrfs_device_io_align(leaf, dev_item);
+ device->io_width = btrfs_device_io_width(leaf, dev_item);
+ device->sector_size = btrfs_device_sector_size(leaf, dev_item);
+
+ ptr = (unsigned long)btrfs_device_uuid(dev_item);
+ read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
+
+ return 0;
+}
+
+static int read_one_dev(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_dev_item *dev_item)
+{
+ struct btrfs_device *device;
+ u64 devid;
+ int ret = 0;
+ u8 fs_uuid[BTRFS_UUID_SIZE];
+ u8 dev_uuid[BTRFS_UUID_SIZE];
+
+ devid = btrfs_device_id(leaf, dev_item);
+ read_extent_buffer(leaf, dev_uuid,
+ (unsigned long)btrfs_device_uuid(dev_item),
+ BTRFS_UUID_SIZE);
+ read_extent_buffer(leaf, fs_uuid,
+ (unsigned long)btrfs_device_fsid(dev_item),
+ BTRFS_FSID_SIZE);
+
+ if (memcmp(fs_uuid, fs_info->fs_devices->fsid, BTRFS_UUID_SIZE)) {
+ error("Seed device is not yet supported\n");
+ return -ENOTSUPP;
+ }
+
+ device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
+ if (!device) {
+ device = kzalloc(sizeof(*device), GFP_NOFS);
+ if (!device)
+ return -ENOMEM;
+ list_add(&device->dev_list,
+ &fs_info->fs_devices->devices);
+ }
+
+ fill_device_from_item(leaf, dev_item, device);
+ fs_info->fs_devices->total_rw_bytes +=
+ btrfs_device_total_bytes(leaf, dev_item);
+ return ret;
+}
+
+int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_super_block *super_copy = fs_info->super_copy;
+ struct extent_buffer *sb;
+ struct btrfs_disk_key *disk_key;
+ struct btrfs_chunk *chunk;
+ u8 *array_ptr;
+ unsigned long sb_array_offset;
+ int ret = 0;
+ u32 num_stripes;
+ u32 array_size;
+ u32 len = 0;
+ u32 cur_offset;
+ struct btrfs_key key;
+
+ if (fs_info->nodesize < BTRFS_SUPER_INFO_SIZE) {
+ printf("ERROR: nodesize %u too small to read superblock\n",
+ fs_info->nodesize);
+ return -EINVAL;
+ }
+ sb = alloc_dummy_extent_buffer(fs_info, BTRFS_SUPER_INFO_OFFSET,
+ BTRFS_SUPER_INFO_SIZE);
+ if (!sb)
+ return -ENOMEM;
+ btrfs_set_buffer_uptodate(sb);
+ write_extent_buffer(sb, super_copy, 0, sizeof(*super_copy));
+ array_size = btrfs_super_sys_array_size(super_copy);
+
+ array_ptr = super_copy->sys_chunk_array;
+ sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
+ cur_offset = 0;
+
+ while (cur_offset < array_size) {
+ disk_key = (struct btrfs_disk_key *)array_ptr;
+ len = sizeof(*disk_key);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ btrfs_disk_key_to_cpu(&key, disk_key);
+
+ array_ptr += len;
+ sb_array_offset += len;
+ cur_offset += len;
+
+ if (key.type == BTRFS_CHUNK_ITEM_KEY) {
+ chunk = (struct btrfs_chunk *)sb_array_offset;
+ /*
+ * At least one btrfs_chunk with one stripe must be
+ * present, exact stripe count check comes afterwards
+ */
+ len = btrfs_chunk_item_size(1);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ num_stripes = btrfs_chunk_num_stripes(sb, chunk);
+ if (!num_stripes) {
+ printk(
+ "ERROR: invalid number of stripes %u in sys_array at offset %u\n",
+ num_stripes, cur_offset);
+ ret = -EIO;
+ break;
+ }
+
+ len = btrfs_chunk_item_size(num_stripes);
+ if (cur_offset + len > array_size)
+ goto out_short_read;
+
+ ret = read_one_chunk(fs_info, &key, sb, chunk, -1);
+ if (ret)
+ break;
+ } else {
+ printk(
+ "ERROR: unexpected item type %u in sys_array at offset %u\n",
+ (u32)key.type, cur_offset);
+ ret = -EIO;
+ break;
+ }
+ array_ptr += len;
+ sb_array_offset += len;
+ cur_offset += len;
+ }
+ free_extent_buffer(sb);
+ return ret;
+
+out_short_read:
+ printk("ERROR: sys_array too short to read %u bytes at offset %u\n",
+ len, cur_offset);
+ free_extent_buffer(sb);
+ return -EIO;
+}
+
+int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct btrfs_root *root = fs_info->chunk_root;
+ int ret;
+ int slot;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ /*
+ * Read all device items, and then all the chunk items. All
+ * device items are found before any chunk item (their object id
+ * is smaller than the lowest possible object id for a chunk
+ * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
+ */
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.offset = 0;
+ key.type = 0;
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+ while(1) {
+ leaf = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(leaf)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto error;
+ break;
+ }
+ btrfs_item_key_to_cpu(leaf, &found_key, slot);
+ if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+ struct btrfs_dev_item *dev_item;
+ dev_item = btrfs_item_ptr(leaf, slot,
+ struct btrfs_dev_item);
+ ret = read_one_dev(fs_info, leaf, dev_item);
+ if (ret < 0)
+ goto error;
+ } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
+ struct btrfs_chunk *chunk;
+ chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
+ ret = read_one_chunk(fs_info, &found_key, leaf, chunk,
+ slot);
+ if (ret < 0)
+ goto error;
+ }
+ path->slots[0]++;
+ }
+
+ ret = 0;
+error:
+ btrfs_free_path(path);
+ return ret;
+}
+
+/*
+ * Get stripe length from chunk item and its stripe items
+ *
+ * Caller should only call this function after validating the chunk item
+ * by using btrfs_check_chunk_valid().
+ */
+u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk)
+{
+ u64 stripe_len;
+ u64 chunk_len;
+ u32 num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
+ u64 profile = btrfs_chunk_type(leaf, chunk) &
+ BTRFS_BLOCK_GROUP_PROFILE_MASK;
+
+ chunk_len = btrfs_chunk_length(leaf, chunk);
+
+ switch (profile) {
+ case 0: /* Single profile */
+ case BTRFS_BLOCK_GROUP_RAID1:
+ case BTRFS_BLOCK_GROUP_RAID1C3:
+ case BTRFS_BLOCK_GROUP_RAID1C4:
+ case BTRFS_BLOCK_GROUP_DUP:
+ stripe_len = chunk_len;
+ break;
+ case BTRFS_BLOCK_GROUP_RAID0:
+ stripe_len = chunk_len / num_stripes;
+ break;
+ case BTRFS_BLOCK_GROUP_RAID5:
+ stripe_len = chunk_len / (num_stripes - 1);
+ break;
+ case BTRFS_BLOCK_GROUP_RAID6:
+ stripe_len = chunk_len / (num_stripes - 2);
+ break;
+ case BTRFS_BLOCK_GROUP_RAID10:
+ stripe_len = chunk_len / (num_stripes /
+ btrfs_chunk_sub_stripes(leaf, chunk));
+ break;
+ default:
+ /* Invalid chunk profile found */
+ BUG_ON(1);
+ }
+ return stripe_len;
+}
+
+int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ int ret;
+
+ ce = search_cache_extent(&map_tree->cache_tree, logical);
+ if (!ce) {
+ fprintf(stderr, "No mapping for %llu-%llu\n",
+ (unsigned long long)logical,
+ (unsigned long long)logical+len);
+ return 1;
+ }
+ if (ce->start > logical || ce->start + ce->size < logical) {
+ fprintf(stderr, "Invalid mapping for %llu-%llu, got "
+ "%llu-%llu\n", (unsigned long long)logical,
+ (unsigned long long)logical+len,
+ (unsigned long long)ce->start,
+ (unsigned long long)ce->start + ce->size);
+ return 1;
+ }
+ map = container_of(ce, struct map_lookup, ce);
+
+ if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4))
+ ret = map->num_stripes;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
+ ret = map->sub_stripes;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
+ ret = 2;
+ else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ ret = 3;
+ else
+ ret = 1;
+ return ret;
+}
+
+int btrfs_next_bg(struct btrfs_fs_info *fs_info, u64 *logical,
+ u64 *size, u64 type)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ u64 cur = *logical;
+
+ ce = search_cache_extent(&map_tree->cache_tree, cur);
+
+ while (ce) {
+ /*
+ * only jump to next bg if our cur is not 0
+ * As the initial logical for btrfs_next_bg() is 0, and
+ * if we jump to next bg, we skipped a valid bg.
+ */
+ if (cur) {
+ ce = next_cache_extent(ce);
+ if (!ce)
+ return -ENOENT;
+ }
+
+ cur = ce->start;
+ map = container_of(ce, struct map_lookup, ce);
+ if (map->type & type) {
+ *logical = ce->start;
+ *size = ce->size;
+ return 0;
+ }
+ if (!cur)
+ ce = next_cache_extent(ce);
+ }
+
+ return -ENOENT;
+}
+
+static inline int parity_smaller(u64 a, u64 b)
+{
+ return a > b;
+}
+
+/* Bubble-sort the stripe set to put the parity/syndrome stripes last */
+static void sort_parity_stripes(struct btrfs_multi_bio *bbio, u64 *raid_map)
+{
+ struct btrfs_bio_stripe s;
+ int i;
+ u64 l;
+ int again = 1;
+
+ while (again) {
+ again = 0;
+ for (i = 0; i < bbio->num_stripes - 1; i++) {
+ if (parity_smaller(raid_map[i], raid_map[i+1])) {
+ s = bbio->stripes[i];
+ l = raid_map[i];
+ bbio->stripes[i] = bbio->stripes[i+1];
+ raid_map[i] = raid_map[i+1];
+ bbio->stripes[i+1] = s;
+ raid_map[i+1] = l;
+ again = 1;
+ }
+ }
+ }
+}
+
+int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+ u64 logical, u64 *length, u64 *type,
+ struct btrfs_multi_bio **multi_ret, int mirror_num,
+ u64 **raid_map_ret)
+{
+ struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
+ struct cache_extent *ce;
+ struct map_lookup *map;
+ u64 offset;
+ u64 stripe_offset;
+ u64 *raid_map = NULL;
+ int stripe_nr;
+ int stripes_allocated = 8;
+ int stripes_required = 1;
+ int stripe_index;
+ int i;
+ struct btrfs_multi_bio *multi = NULL;
+
+ if (multi_ret && rw == READ) {
+ stripes_allocated = 1;
+ }
+again:
+ ce = search_cache_extent(&map_tree->cache_tree, logical);
+ if (!ce) {
+ kfree(multi);
+ *length = (u64)-1;
+ return -ENOENT;
+ }
+ if (ce->start > logical) {
+ kfree(multi);
+ *length = ce->start - logical;
+ return -ENOENT;
+ }
+
+ if (multi_ret) {
+ multi = kzalloc(btrfs_multi_bio_size(stripes_allocated),
+ GFP_NOFS);
+ if (!multi)
+ return -ENOMEM;
+ }
+ map = container_of(ce, struct map_lookup, ce);
+ offset = logical - ce->start;
+
+ if (rw == WRITE) {
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID1C3 |
+ BTRFS_BLOCK_GROUP_RAID1C4 |
+ BTRFS_BLOCK_GROUP_DUP)) {
+ stripes_required = map->num_stripes;
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ stripes_required = map->sub_stripes;
+ }
+ }
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6)
+ && multi_ret && ((rw & WRITE) || mirror_num > 1) && raid_map_ret) {
+ /* RAID[56] write or recovery. Return all stripes */
+ stripes_required = map->num_stripes;
+
+ /* Only allocate the map if we've already got a large enough multi_ret */
+ if (stripes_allocated >= stripes_required) {
+ raid_map = kmalloc(sizeof(u64) * map->num_stripes, GFP_NOFS);
+ if (!raid_map) {
+ kfree(multi);
+ return -ENOMEM;
+ }
+ }
+ }
+
+ /* if our multi bio struct is too small, back off and try again */
+ if (multi_ret && stripes_allocated < stripes_required) {
+ stripes_allocated = stripes_required;
+ kfree(multi);
+ multi = NULL;
+ goto again;
+ }
+ stripe_nr = offset;
+ /*
+ * stripe_nr counts the total number of stripes we have to stride
+ * to get to this block
+ */
+ stripe_nr = stripe_nr / map->stripe_len;
+
+ stripe_offset = stripe_nr * map->stripe_len;
+ BUG_ON(offset < stripe_offset);
+
+ /* stripe_offset is the offset of this block in its stripe*/
+ stripe_offset = offset - stripe_offset;
+
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID1C3 | BTRFS_BLOCK_GROUP_RAID1C4 |
+ BTRFS_BLOCK_GROUP_RAID5 | BTRFS_BLOCK_GROUP_RAID6 |
+ BTRFS_BLOCK_GROUP_RAID10 |
+ BTRFS_BLOCK_GROUP_DUP)) {
+ /* we limit the length of each bio to what fits in a stripe */
+ *length = min_t(u64, ce->size - offset,
+ map->stripe_len - stripe_offset);
+ } else {
+ *length = ce->size - offset;
+ }
+
+ if (!multi_ret)
+ goto out;
+
+ multi->num_stripes = 1;
+ stripe_index = 0;
+ if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID1C3 |
+ BTRFS_BLOCK_GROUP_RAID1C4)) {
+ if (rw == WRITE)
+ multi->num_stripes = map->num_stripes;
+ else if (mirror_num)
+ stripe_index = mirror_num - 1;
+ else
+ stripe_index = stripe_nr % map->num_stripes;
+ } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
+ int factor = map->num_stripes / map->sub_stripes;
+
+ stripe_index = stripe_nr % factor;
+ stripe_index *= map->sub_stripes;
+
+ if (rw == WRITE)
+ multi->num_stripes = map->sub_stripes;
+ else if (mirror_num)
+ stripe_index += mirror_num - 1;
+
+ stripe_nr = stripe_nr / factor;
+ } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
+ if (rw == WRITE)
+ multi->num_stripes = map->num_stripes;
+ else if (mirror_num)
+ stripe_index = mirror_num - 1;
+ } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID6)) {
+
+ if (raid_map) {
+ int rot;
+ u64 tmp;
+ u64 raid56_full_stripe_start;
+ u64 full_stripe_len = nr_data_stripes(map) * map->stripe_len;
+
+ /*
+ * align the start of our data stripe in the logical
+ * address space
+ */
+ raid56_full_stripe_start = offset / full_stripe_len;
+ raid56_full_stripe_start *= full_stripe_len;
+
+ /* get the data stripe number */
+ stripe_nr = raid56_full_stripe_start / map->stripe_len;
+ stripe_nr = stripe_nr / nr_data_stripes(map);
+
+ /* Work out the disk rotation on this stripe-set */
+ rot = stripe_nr % map->num_stripes;
+
+ /* Fill in the logical address of each stripe */
+ tmp = stripe_nr * nr_data_stripes(map);
+
+ for (i = 0; i < nr_data_stripes(map); i++)
+ raid_map[(i+rot) % map->num_stripes] =
+ ce->start + (tmp + i) * map->stripe_len;
+
+ raid_map[(i+rot) % map->num_stripes] = BTRFS_RAID5_P_STRIPE;
+ if (map->type & BTRFS_BLOCK_GROUP_RAID6)
+ raid_map[(i+rot+1) % map->num_stripes] = BTRFS_RAID6_Q_STRIPE;
+
+ *length = map->stripe_len;
+ stripe_index = 0;
+ stripe_offset = 0;
+ multi->num_stripes = map->num_stripes;
+ } else {
+ stripe_index = stripe_nr % nr_data_stripes(map);
+ stripe_nr = stripe_nr / nr_data_stripes(map);
+
+ /*
+ * Mirror #0 or #1 means the original data block.
+ * Mirror #2 is RAID5 parity block.
+ * Mirror #3 is RAID6 Q block.
+ */
+ if (mirror_num > 1)
+ stripe_index = nr_data_stripes(map) + mirror_num - 2;
+
+ /* We distribute the parity blocks across stripes */
+ stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
+ }
+ } else {
+ /*
+ * after this do_div call, stripe_nr is the number of stripes
+ * on this device we have to walk to find the data, and
+ * stripe_index is the number of our device in the stripe array
+ */
+ stripe_index = stripe_nr % map->num_stripes;
+ stripe_nr = stripe_nr / map->num_stripes;
+ }
+ BUG_ON(stripe_index >= map->num_stripes);
+
+ for (i = 0; i < multi->num_stripes; i++) {
+ multi->stripes[i].physical =
+ map->stripes[stripe_index].physical + stripe_offset +
+ stripe_nr * map->stripe_len;
+ multi->stripes[i].dev = map->stripes[stripe_index].dev;
+ stripe_index++;
+ }
+ *multi_ret = multi;
+
+ if (type)
+ *type = map->type;
+
+ if (raid_map) {
+ sort_parity_stripes(multi, raid_map);
+ *raid_map_ret = raid_map;
+ }
+out:
+ return 0;
+}
+
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+ u64 logical, u64 *length,
+ struct btrfs_multi_bio **multi_ret, int mirror_num,
+ u64 **raid_map_ret)
+{
+ return __btrfs_map_block(fs_info, rw, logical, length, NULL,
+ multi_ret, mirror_num, raid_map_ret);
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
new file mode 100644
index 0000000000..9d1a07ae78
--- /dev/null
+++ b/fs/btrfs/volumes.h
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#ifndef __BTRFS_VOLUMES_H__
+#define __BTRFS_VOLUMES_H__
+
+#include <fs_internal.h>
+#include "ctree.h"
+
+#define BTRFS_STRIPE_LEN SZ_64K
+
+struct btrfs_device {
+ struct list_head dev_list;
+ struct btrfs_root *dev_root;
+ struct btrfs_fs_devices *fs_devices;
+
+ struct blk_desc *desc;
+ struct disk_partition *part;
+
+ u64 total_devs;
+ u64 super_bytes_used;
+
+ u64 generation;
+
+ /* the internal btrfs device id */
+ u64 devid;
+
+ /* size of the device */
+ u64 total_bytes;
+
+ /* bytes used */
+ u64 bytes_used;
+
+ /* optimal io alignment for this device */
+ u32 io_align;
+
+ /* optimal io width for this device */
+ u32 io_width;
+
+ /* minimal io size for this device */
+ u32 sector_size;
+
+ /* type and info about this device */
+ u64 type;
+
+ /* physical drive uuid (or lvm uuid) */
+ u8 uuid[BTRFS_UUID_SIZE];
+};
+
+struct btrfs_fs_devices {
+ u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+ u8 metadata_uuid[BTRFS_FSID_SIZE]; /* FS specific uuid */
+
+ u64 latest_devid;
+ u64 lowest_devid;
+ u64 latest_trans;
+
+ u64 total_rw_bytes;
+
+ struct list_head devices;
+ struct list_head list;
+
+ int seeding;
+ struct btrfs_fs_devices *seed;
+};
+
+struct btrfs_bio_stripe {
+ struct btrfs_device *dev;
+ u64 physical;
+};
+
+struct btrfs_multi_bio {
+ int error;
+ int num_stripes;
+ struct btrfs_bio_stripe stripes[];
+};
+
+struct map_lookup {
+ struct cache_extent ce;
+ u64 type;
+ int io_align;
+ int io_width;
+ int stripe_len;
+ int sector_size;
+ int num_stripes;
+ int sub_stripes;
+ struct btrfs_bio_stripe stripes[];
+};
+
+struct btrfs_raid_attr {
+ int sub_stripes; /* sub_stripes info for map */
+ int dev_stripes; /* stripes per dev */
+ int devs_max; /* max devs to use */
+ int devs_min; /* min devs needed */
+ int tolerated_failures; /* max tolerated fail devs */
+ int devs_increment; /* ndevs has to be a multiple of this */
+ int ncopies; /* how many copies to data has */
+ int nparity; /* number of stripes worth of bytes to store
+ * parity information */
+ const char raid_name[8]; /* name of the raid */
+ u64 bg_flag; /* block group flag of the raid */
+};
+
+extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
+
+static inline enum btrfs_raid_types btrfs_bg_flags_to_raid_index(u64 flags)
+{
+ if (flags & BTRFS_BLOCK_GROUP_RAID10)
+ return BTRFS_RAID_RAID10;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1)
+ return BTRFS_RAID_RAID1;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C3)
+ return BTRFS_RAID_RAID1C3;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID1C4)
+ return BTRFS_RAID_RAID1C4;
+ else if (flags & BTRFS_BLOCK_GROUP_DUP)
+ return BTRFS_RAID_DUP;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID0)
+ return BTRFS_RAID_RAID0;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID5)
+ return BTRFS_RAID_RAID5;
+ else if (flags & BTRFS_BLOCK_GROUP_RAID6)
+ return BTRFS_RAID_RAID6;
+
+ return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
+}
+
+#define btrfs_multi_bio_size(n) (sizeof(struct btrfs_multi_bio) + \
+ (sizeof(struct btrfs_bio_stripe) * (n)))
+#define btrfs_map_lookup_size(n) (sizeof(struct map_lookup) + \
+ (sizeof(struct btrfs_bio_stripe) * (n)))
+
+#define BTRFS_RAID5_P_STRIPE ((u64)-2)
+#define BTRFS_RAID6_Q_STRIPE ((u64)-1)
+
+static inline u64 calc_stripe_length(u64 type, u64 length, int num_stripes)
+{
+ u64 stripe_size;
+
+ if (type & BTRFS_BLOCK_GROUP_RAID0) {
+ stripe_size = length;
+ stripe_size /= num_stripes;
+ } else if (type & BTRFS_BLOCK_GROUP_RAID10) {
+ stripe_size = length * 2;
+ stripe_size /= num_stripes;
+ } else if (type & BTRFS_BLOCK_GROUP_RAID5) {
+ stripe_size = length;
+ stripe_size /= (num_stripes - 1);
+ } else if (type & BTRFS_BLOCK_GROUP_RAID6) {
+ stripe_size = length;
+ stripe_size /= (num_stripes - 2);
+ } else {
+ stripe_size = length;
+ }
+ return stripe_size;
+}
+
+#ifndef READ
+#define READ 0
+#define WRITE 1
+#define READA 2
+#endif
+
+int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+ u64 logical, u64 *length, u64 *type,
+ struct btrfs_multi_bio **multi_ret, int mirror_num,
+ u64 **raid_map);
+int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
+ u64 logical, u64 *length,
+ struct btrfs_multi_bio **multi_ret, int mirror_num,
+ u64 **raid_map_ret);
+int btrfs_next_bg(struct btrfs_fs_info *map_tree, u64 *logical,
+ u64 *size, u64 type);
+static inline int btrfs_next_bg_metadata(struct btrfs_fs_info *fs_info,
+ u64 *logical, u64 *size)
+{
+ return btrfs_next_bg(fs_info, logical, size,
+ BTRFS_BLOCK_GROUP_METADATA);
+}
+static inline int btrfs_next_bg_system(struct btrfs_fs_info *fs_info,
+ u64 *logical, u64 *size)
+{
+ return btrfs_next_bg(fs_info, logical, size,
+ BTRFS_BLOCK_GROUP_SYSTEM);
+}
+int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
+int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices);
+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
+void btrfs_close_all_devices(void);
+int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
+int btrfs_scan_one_device(struct blk_desc *desc, struct disk_partition *part,
+ struct btrfs_fs_devices **fs_devices_ret,
+ u64 *total_devs);
+struct list_head *btrfs_scanned_uuids(void);
+struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
+ u8 *uuid, u8 *fsid);
+int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk,
+ int slot, u64 logical);
+u64 btrfs_stripe_length(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *leaf,
+ struct btrfs_chunk *chunk);
+#endif