From 04c524613eca7ff64aa84e693f125687a90e28d8 Mon Sep 17 00:00:00 2001 From: Forza Date: Sun, 1 Dec 2024 10:31:38 +0100 Subject: [PATCH] Btrfs Allocator Hints: added patch to Linux 5.15 --- .../btrfs_allocator_hints-5.15.patch | 385 ++++++++++++++++++ 1 file changed, 385 insertions(+) create mode 100644 Btrfs/Allocator Hints/btrfs_allocator_hints-5.15.patch diff --git a/Btrfs/Allocator Hints/btrfs_allocator_hints-5.15.patch b/Btrfs/Allocator Hints/btrfs_allocator_hints-5.15.patch new file mode 100644 index 0000000..49945b9 --- /dev/null +++ b/Btrfs/Allocator Hints/btrfs_allocator_hints-5.15.patch @@ -0,0 +1,385 @@ +From 60b52539b055332086a7e7da9da9cc1f4909f55a Mon Sep 17 00:00:00 2001 +From: Goffredo Baroncelli +Date: Sun, 24 Oct 2021 17:31:04 +0200 +Subject: [PATCH 1/4] btrfs: add flags to give an hint to the chunk allocator + +Add the following flags to give an hint about which chunk should be +allocated in which a disk. +The following flags are created: + +- BTRFS_DEV_ALLOCATION_PREFERRED_DATA + preferred data chunk, but metadata chunk allowed +- BTRFS_DEV_ALLOCATION_PREFERRED_METADATA + preferred metadata chunk, but data chunk allowed +- BTRFS_DEV_ALLOCATION_METADATA_ONLY + only metadata chunk allowed +- BTRFS_DEV_ALLOCATION_DATA_ONLY + only data chunk allowed + +Signed-off-by: Goffredo Baroncelli +--- + include/uapi/linux/btrfs_tree.h | 14 ++++++++++++++ + 1 file changed, 14 insertions(+) + +diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h +index e1c4c732aabac2..a8e32ff44ab8b3 100644 +--- a/include/uapi/linux/btrfs_tree.h ++++ b/include/uapi/linux/btrfs_tree.h +@@ -384,6 +384,20 @@ struct btrfs_key { + __u64 offset; + } __attribute__ ((__packed__)); + ++/* dev_item.type */ ++ ++/* btrfs chunk allocation hints */ ++#define BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT 3 ++/* preferred data chunk, but metadata chunk allowed */ ++#define BTRFS_DEV_ALLOCATION_PREFERRED_DATA (0ULL) ++/* preferred metadata chunk, but data chunk allowed */ ++#define BTRFS_DEV_ALLOCATION_PREFERRED_METADATA (1ULL) ++/* only metadata chunk are allowed */ ++#define BTRFS_DEV_ALLOCATION_METADATA_ONLY (2ULL) ++/* only data chunk allowed */ ++#define BTRFS_DEV_ALLOCATION_DATA_ONLY (3ULL) ++/* 5..7 are unused values */ ++ + struct btrfs_dev_item { + /* the internal btrfs device id */ + __le64 devid; + +From ea05f0db64e8713b509a3ba18a47842080a7ed6a Mon Sep 17 00:00:00 2001 +From: Goffredo Baroncelli +Date: Sun, 24 Oct 2021 17:31:05 +0200 +Subject: [PATCH 2/4] btrfs: export dev_item.type in + /sys/fs/btrfs//devinfo//type + +Signed-off-by: Goffredo Baroncelli +--- + fs/btrfs/sysfs.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c +index 25a6f587852be2..207675930c1158 100644 +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -1570,6 +1570,16 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj, + } + BTRFS_ATTR(devid, error_stats, btrfs_devinfo_error_stats_show); + ++static ssize_t btrfs_devinfo_type_show(struct kobject *kobj, ++ struct kobj_attribute *a, char *buf) ++{ ++ struct btrfs_device *device = container_of(kobj, struct btrfs_device, ++ devid_kobj); ++ ++ return scnprintf(buf, PAGE_SIZE, "0x%08llx\n", device->type); ++} ++BTRFS_ATTR(devid, type, btrfs_devinfo_type_show); ++ + /* + * Information about one device. + * +@@ -1582,6 +1592,7 @@ static struct attribute *devid_attrs[] = { + BTRFS_ATTR_PTR(devid, replace_target), + BTRFS_ATTR_PTR(devid, scrub_speed_max), + BTRFS_ATTR_PTR(devid, writeable), ++ BTRFS_ATTR_PTR(devid, type), + NULL + }; + ATTRIBUTE_GROUPS(devid); + +From 5af82dfcafa8ed103e29315436adf1eb3fa6044f Mon Sep 17 00:00:00 2001 +From: Goffredo Baroncelli +Date: Sun, 24 Oct 2021 17:31:06 +0200 +Subject: [PATCH 3/4] btrfs: change the DEV_ITEM 'type' field via sysfs + +Signed-off-by: Kai Krakow +--- + fs/btrfs/sysfs.c | 56 +++++++++++++++++++++++++++++++++++++++++++++- + fs/btrfs/volumes.c | 2 +- + fs/btrfs/volumes.h | 3 ++- + 3 files changed, 58 insertions(+), 3 deletions(-) + +diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c +index 207675930c1158..722bf99cb0236a 100644 +--- a/fs/btrfs/sysfs.c ++++ b/fs/btrfs/sysfs.c +@@ -1578,7 +1578,61 @@ static ssize_t btrfs_devinfo_type_show(struct kobject *kobj, + + return scnprintf(buf, PAGE_SIZE, "0x%08llx\n", device->type); + } +-BTRFS_ATTR(devid, type, btrfs_devinfo_type_show); ++ ++static ssize_t btrfs_devinfo_type_store(struct kobject *kobj, ++ struct kobj_attribute *a, ++ const char *buf, size_t len) ++{ ++ struct btrfs_fs_info *fs_info; ++ struct btrfs_root *root; ++ struct btrfs_device *device; ++ int ret; ++ struct btrfs_trans_handle *trans; ++ ++ u64 type, prev_type; ++ ++ device = container_of(kobj, struct btrfs_device, devid_kobj); ++ fs_info = device->fs_info; ++ if (!fs_info) ++ return -EPERM; ++ ++ root = fs_info->chunk_root; ++ if (sb_rdonly(fs_info->sb)) ++ return -EROFS; ++ ++ ret = kstrtou64(buf, 0, &type); ++ if (ret < 0) ++ return -EINVAL; ++ ++ /* for now, allow to touch only the 'allocation hint' bits */ ++ if (type & ~((1 << BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT) - 1)) ++ return -EINVAL; ++ ++ trans = btrfs_start_transaction(root, 1); ++ if (IS_ERR(trans)) ++ return PTR_ERR(trans); ++ ++ prev_type = device->type; ++ device->type = type; ++ ++ ret = btrfs_update_device(trans, device); ++ ++ if (ret < 0) { ++ btrfs_abort_transaction(trans, ret); ++ btrfs_end_transaction(trans); ++ goto abort; ++ } ++ ++ ret = btrfs_commit_transaction(trans); ++ if (ret < 0) ++ goto abort; ++ ++ return len; ++abort: ++ device->type = prev_type; ++ return ret; ++} ++BTRFS_ATTR_RW(devid, type, btrfs_devinfo_type_show, btrfs_devinfo_type_store); + + /* + * Information about one device. +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index 2ec3b8ac8fa357..f00cdedbbd11d8 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -2773,7 +2773,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path + return ret; + } + +-static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, ++noinline int btrfs_update_device(struct btrfs_trans_handle *trans, + struct btrfs_device *device) + { + int ret; +diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h +index 2183361db614da..5b6861ae468900 100644 +--- a/fs/btrfs/volumes.h ++++ b/fs/btrfs/volumes.h +@@ -581,5 +581,6 @@ int btrfs_bg_type_to_factor(u64 flags); + const char *btrfs_bg_type_to_raid_name(u64 flags); + int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info); + int btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical); +- ++int btrfs_update_device(struct btrfs_trans_handle *trans, ++ struct btrfs_device *device); + #endif + +From 14a694d039fd11e59dd90aa7cbca4af1df54c146 Mon Sep 17 00:00:00 2001 +From: Goffredo Baroncelli +Date: Sun, 24 Oct 2021 17:31:07 +0200 +Subject: [PATCH 4/4] btrfs: add allocator_hint mode + +When this mode is enabled, the chunk allocation policy is modified as +follow. + +Each disk may have a different tag: +- BTRFS_DEV_ALLOCATION_PREFERRED_METADATA +- BTRFS_DEV_ALLOCATION_METADATA_ONLY +- BTRFS_DEV_ALLOCATION_DATA_ONLY +- BTRFS_DEV_ALLOCATION_PREFERRED_DATA (default) + +Where: +- ALLOCATION_PREFERRED_X means that it is preferred to use this disk for +the X chunk type (the other type may be allowed when the space is low) +- ALLOCATION_X_ONLY means that it is used *only* for the X chunk type. +This means also that it is a preferred choice. + +Each time the allocator allocates a chunk of type X , first it takes the +disks tagged as ALLOCATION_X_ONLY or ALLOCATION_PREFERRED_X; if the space +is not enough, it uses also the disks tagged as ALLOCATION_METADATA_ONLY; +if the space is not enough, it uses also the other disks, with the +exception of the one marked as ALLOCATION_PREFERRED_Y, where Y the other +type of chunk (i.e. not X). + +Signed-off-by: Goffredo Baroncelli +--- + fs/btrfs/volumes.c | 97 +++++++++++++++++++++++++++++++++++++++++++++- + fs/btrfs/volumes.h | 1 + + 2 files changed, 97 insertions(+), 1 deletion(-) + +diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c +index f00cdedbbd11d8..e74f2126cdf992 100644 +--- a/fs/btrfs/volumes.c ++++ b/fs/btrfs/volumes.c +@@ -179,6 +179,19 @@ enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags + return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */ + } + ++#define BTRFS_DEV_ALLOCATION_MASK ((1ULL << \ ++ BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT) - 1) ++#define BTRFS_DEV_ALLOCATION_MASK_COUNT (1ULL << \ ++ BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT) ++ ++static const char alloc_hint_map[BTRFS_DEV_ALLOCATION_MASK_COUNT] = { ++ [BTRFS_DEV_ALLOCATION_DATA_ONLY] = -1, ++ [BTRFS_DEV_ALLOCATION_PREFERRED_DATA] = 0, ++ [BTRFS_DEV_ALLOCATION_PREFERRED_METADATA] = 1, ++ [BTRFS_DEV_ALLOCATION_METADATA_ONLY] = 2, ++ /* the other values are set to 0 */ ++}; ++ + const char *btrfs_bg_type_to_raid_name(u64 flags) + { + const int index = btrfs_bg_flags_to_raid_index(flags); +@@ -4938,13 +4951,18 @@ static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info, + } + + /* +- * sort the devices in descending order by max_avail, total_avail ++ * sort the devices in descending order by alloc_hint, ++ * max_avail, total_avail + */ + static int btrfs_cmp_device_info(const void *a, const void *b) + { + const struct btrfs_device_info *di_a = a; + const struct btrfs_device_info *di_b = b; + ++ if (di_a->alloc_hint > di_b->alloc_hint) ++ return -1; ++ if (di_a->alloc_hint < di_b->alloc_hint) ++ return 1; + if (di_a->max_avail > di_b->max_avail) + return -1; + if (di_a->max_avail < di_b->max_avail) +@@ -5107,6 +5125,8 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices, + int ndevs = 0; + u64 max_avail; + u64 dev_offset; ++ int hint; ++ int i; + + /* + * in the first pass through the devices list, we gather information +@@ -5159,16 +5179,91 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices, + devices_info[ndevs].max_avail = max_avail; + devices_info[ndevs].total_avail = total_avail; + devices_info[ndevs].dev = device; ++ ++ if ((ctl->type & BTRFS_BLOCK_GROUP_DATA) && ++ (ctl->type & BTRFS_BLOCK_GROUP_METADATA)) { ++ /* ++ * if mixed bg set all the alloc_hint ++ * fields to the same value, so the sorting ++ * is not affected ++ */ ++ devices_info[ndevs].alloc_hint = 0; ++ } else if (ctl->type & BTRFS_BLOCK_GROUP_DATA) { ++ hint = device->type & BTRFS_DEV_ALLOCATION_MASK; ++ ++ /* ++ * skip BTRFS_DEV_METADATA_ONLY disks ++ */ ++ if (hint == BTRFS_DEV_ALLOCATION_METADATA_ONLY) ++ continue; ++ /* ++ * if a data chunk must be allocated, ++ * sort also by hint (data disk ++ * higher priority) ++ */ ++ devices_info[ndevs].alloc_hint = -alloc_hint_map[hint]; ++ } else { /* BTRFS_BLOCK_GROUP_METADATA */ ++ hint = device->type & BTRFS_DEV_ALLOCATION_MASK; ++ ++ /* ++ * skip BTRFS_DEV_DATA_ONLY disks ++ */ ++ if (hint == BTRFS_DEV_ALLOCATION_DATA_ONLY) ++ continue; ++ /* ++ * if a data chunk must be allocated, ++ * sort also by hint (metadata hint ++ * higher priority) ++ */ ++ devices_info[ndevs].alloc_hint = alloc_hint_map[hint]; ++ } ++ + ++ndevs; + } + ctl->ndevs = ndevs; + ++ /* ++ * no devices available ++ */ ++ if (!ndevs) ++ return 0; ++ + /* + * now sort the devices by hole size / available space + */ + sort(devices_info, ndevs, sizeof(struct btrfs_device_info), + btrfs_cmp_device_info, NULL); + ++ /* ++ * select the minimum set of disks grouped by hint that ++ * can host the chunk ++ */ ++ ndevs = 0; ++ while (ndevs < ctl->ndevs) { ++ hint = devices_info[ndevs++].alloc_hint; ++ while (ndevs < ctl->ndevs && ++ devices_info[ndevs].alloc_hint == hint) ++ ndevs++; ++ if (ndevs >= ctl->devs_min) ++ break; ++ } ++ ++ BUG_ON(ndevs > ctl->ndevs); ++ ctl->ndevs = ndevs; ++ ++ /* ++ * the next layers require the devices_info ordered by ++ * max_avail. If we are returing two (or more) different ++ * group of alloc_hint, this is not always true. So sort ++ * these gain. ++ */ ++ ++ for (i = 0 ; i < ndevs ; i++) ++ devices_info[i].alloc_hint = 0; ++ ++ sort(devices_info, ndevs, sizeof(struct btrfs_device_info), ++ btrfs_cmp_device_info, NULL); ++ + return 0; + } + +diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h +index 5b6861ae468900..1644d7c428a215 100644 +--- a/fs/btrfs/volumes.h ++++ b/fs/btrfs/volumes.h +@@ -369,6 +369,7 @@ struct btrfs_device_info { + u64 dev_offset; + u64 max_avail; + u64 total_avail; ++ int alloc_hint; + }; + + struct btrfs_raid_attr {