Btrfs Allocator Hints: added patch to Linux 5.15
This commit is contained in:
parent
fb9449e1ca
commit
04c524613e
385
Btrfs/Allocator Hints/btrfs_allocator_hints-5.15.patch
Normal file
385
Btrfs/Allocator Hints/btrfs_allocator_hints-5.15.patch
Normal file
@ -0,0 +1,385 @@
|
||||
From 60b52539b055332086a7e7da9da9cc1f4909f55a Mon Sep 17 00:00:00 2001
|
||||
From: Goffredo Baroncelli <kreijack@inwind.it>
|
||||
Date: Sun, 24 Oct 2021 17:31:04 +0200
|
||||
Subject: [PATCH 1/4] btrfs: add flags to give an hint to the chunk allocator
|
||||
|
||||
Add the following flags to give an hint about which chunk should be
|
||||
allocated in which a disk.
|
||||
The following flags are created:
|
||||
|
||||
- BTRFS_DEV_ALLOCATION_PREFERRED_DATA
|
||||
preferred data chunk, but metadata chunk allowed
|
||||
- BTRFS_DEV_ALLOCATION_PREFERRED_METADATA
|
||||
preferred metadata chunk, but data chunk allowed
|
||||
- BTRFS_DEV_ALLOCATION_METADATA_ONLY
|
||||
only metadata chunk allowed
|
||||
- BTRFS_DEV_ALLOCATION_DATA_ONLY
|
||||
only data chunk allowed
|
||||
|
||||
Signed-off-by: Goffredo Baroncelli <kreijack@inwid.it>
|
||||
---
|
||||
include/uapi/linux/btrfs_tree.h | 14 ++++++++++++++
|
||||
1 file changed, 14 insertions(+)
|
||||
|
||||
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
|
||||
index e1c4c732aabac2..a8e32ff44ab8b3 100644
|
||||
--- a/include/uapi/linux/btrfs_tree.h
|
||||
+++ b/include/uapi/linux/btrfs_tree.h
|
||||
@@ -384,6 +384,20 @@ struct btrfs_key {
|
||||
__u64 offset;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
+/* dev_item.type */
|
||||
+
|
||||
+/* btrfs chunk allocation hints */
|
||||
+#define BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT 3
|
||||
+/* preferred data chunk, but metadata chunk allowed */
|
||||
+#define BTRFS_DEV_ALLOCATION_PREFERRED_DATA (0ULL)
|
||||
+/* preferred metadata chunk, but data chunk allowed */
|
||||
+#define BTRFS_DEV_ALLOCATION_PREFERRED_METADATA (1ULL)
|
||||
+/* only metadata chunk are allowed */
|
||||
+#define BTRFS_DEV_ALLOCATION_METADATA_ONLY (2ULL)
|
||||
+/* only data chunk allowed */
|
||||
+#define BTRFS_DEV_ALLOCATION_DATA_ONLY (3ULL)
|
||||
+/* 5..7 are unused values */
|
||||
+
|
||||
struct btrfs_dev_item {
|
||||
/* the internal btrfs device id */
|
||||
__le64 devid;
|
||||
|
||||
From ea05f0db64e8713b509a3ba18a47842080a7ed6a Mon Sep 17 00:00:00 2001
|
||||
From: Goffredo Baroncelli <kreijack@inwind.it>
|
||||
Date: Sun, 24 Oct 2021 17:31:05 +0200
|
||||
Subject: [PATCH 2/4] btrfs: export dev_item.type in
|
||||
/sys/fs/btrfs/<uuid>/devinfo/<devid>/type
|
||||
|
||||
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
|
||||
---
|
||||
fs/btrfs/sysfs.c | 11 +++++++++++
|
||||
1 file changed, 11 insertions(+)
|
||||
|
||||
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
|
||||
index 25a6f587852be2..207675930c1158 100644
|
||||
--- a/fs/btrfs/sysfs.c
|
||||
+++ b/fs/btrfs/sysfs.c
|
||||
@@ -1570,6 +1570,16 @@ static ssize_t btrfs_devinfo_error_stats_show(struct kobject *kobj,
|
||||
}
|
||||
BTRFS_ATTR(devid, error_stats, btrfs_devinfo_error_stats_show);
|
||||
|
||||
+static ssize_t btrfs_devinfo_type_show(struct kobject *kobj,
|
||||
+ struct kobj_attribute *a, char *buf)
|
||||
+{
|
||||
+ struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||
+ devid_kobj);
|
||||
+
|
||||
+ return scnprintf(buf, PAGE_SIZE, "0x%08llx\n", device->type);
|
||||
+}
|
||||
+BTRFS_ATTR(devid, type, btrfs_devinfo_type_show);
|
||||
+
|
||||
/*
|
||||
* Information about one device.
|
||||
*
|
||||
@@ -1582,6 +1592,7 @@ static struct attribute *devid_attrs[] = {
|
||||
BTRFS_ATTR_PTR(devid, replace_target),
|
||||
BTRFS_ATTR_PTR(devid, scrub_speed_max),
|
||||
BTRFS_ATTR_PTR(devid, writeable),
|
||||
+ BTRFS_ATTR_PTR(devid, type),
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(devid);
|
||||
|
||||
From 5af82dfcafa8ed103e29315436adf1eb3fa6044f Mon Sep 17 00:00:00 2001
|
||||
From: Goffredo Baroncelli <kreijack@inwind.it>
|
||||
Date: Sun, 24 Oct 2021 17:31:06 +0200
|
||||
Subject: [PATCH 3/4] btrfs: change the DEV_ITEM 'type' field via sysfs
|
||||
|
||||
Signed-off-by: Kai Krakow <kai@kaishome.de>
|
||||
---
|
||||
fs/btrfs/sysfs.c | 56 +++++++++++++++++++++++++++++++++++++++++++++-
|
||||
fs/btrfs/volumes.c | 2 +-
|
||||
fs/btrfs/volumes.h | 3 ++-
|
||||
3 files changed, 58 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
|
||||
index 207675930c1158..722bf99cb0236a 100644
|
||||
--- a/fs/btrfs/sysfs.c
|
||||
+++ b/fs/btrfs/sysfs.c
|
||||
@@ -1578,7 +1578,61 @@ static ssize_t btrfs_devinfo_type_show(struct kobject *kobj,
|
||||
|
||||
return scnprintf(buf, PAGE_SIZE, "0x%08llx\n", device->type);
|
||||
}
|
||||
-BTRFS_ATTR(devid, type, btrfs_devinfo_type_show);
|
||||
+
|
||||
+static ssize_t btrfs_devinfo_type_store(struct kobject *kobj,
|
||||
+ struct kobj_attribute *a,
|
||||
+ const char *buf, size_t len)
|
||||
+{
|
||||
+ struct btrfs_fs_info *fs_info;
|
||||
+ struct btrfs_root *root;
|
||||
+ struct btrfs_device *device;
|
||||
+ int ret;
|
||||
+ struct btrfs_trans_handle *trans;
|
||||
+
|
||||
+ u64 type, prev_type;
|
||||
+
|
||||
+ device = container_of(kobj, struct btrfs_device, devid_kobj);
|
||||
+ fs_info = device->fs_info;
|
||||
+ if (!fs_info)
|
||||
+ return -EPERM;
|
||||
+
|
||||
+ root = fs_info->chunk_root;
|
||||
+ if (sb_rdonly(fs_info->sb))
|
||||
+ return -EROFS;
|
||||
+
|
||||
+ ret = kstrtou64(buf, 0, &type);
|
||||
+ if (ret < 0)
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ /* for now, allow to touch only the 'allocation hint' bits */
|
||||
+ if (type & ~((1 << BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT) - 1))
|
||||
+ return -EINVAL;
|
||||
+
|
||||
+ trans = btrfs_start_transaction(root, 1);
|
||||
+ if (IS_ERR(trans))
|
||||
+ return PTR_ERR(trans);
|
||||
+
|
||||
+ prev_type = device->type;
|
||||
+ device->type = type;
|
||||
+
|
||||
+ ret = btrfs_update_device(trans, device);
|
||||
+
|
||||
+ if (ret < 0) {
|
||||
+ btrfs_abort_transaction(trans, ret);
|
||||
+ btrfs_end_transaction(trans);
|
||||
+ goto abort;
|
||||
+ }
|
||||
+
|
||||
+ ret = btrfs_commit_transaction(trans);
|
||||
+ if (ret < 0)
|
||||
+ goto abort;
|
||||
+
|
||||
+ return len;
|
||||
+abort:
|
||||
+ device->type = prev_type;
|
||||
+ return ret;
|
||||
+}
|
||||
+BTRFS_ATTR_RW(devid, type, btrfs_devinfo_type_show, btrfs_devinfo_type_store);
|
||||
|
||||
/*
|
||||
* Information about one device.
|
||||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
|
||||
index 2ec3b8ac8fa357..f00cdedbbd11d8 100644
|
||||
--- a/fs/btrfs/volumes.c
|
||||
+++ b/fs/btrfs/volumes.c
|
||||
@@ -2773,7 +2773,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
return ret;
|
||||
}
|
||||
|
||||
-static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
|
||||
+noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_device *device)
|
||||
{
|
||||
int ret;
|
||||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
|
||||
index 2183361db614da..5b6861ae468900 100644
|
||||
--- a/fs/btrfs/volumes.h
|
||||
+++ b/fs/btrfs/volumes.h
|
||||
@@ -581,5 +581,6 @@ int btrfs_bg_type_to_factor(u64 flags);
|
||||
const char *btrfs_bg_type_to_raid_name(u64 flags);
|
||||
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
|
||||
-
|
||||
+int btrfs_update_device(struct btrfs_trans_handle *trans,
|
||||
+ struct btrfs_device *device);
|
||||
#endif
|
||||
|
||||
From 14a694d039fd11e59dd90aa7cbca4af1df54c146 Mon Sep 17 00:00:00 2001
|
||||
From: Goffredo Baroncelli <kreijack@inwind.it>
|
||||
Date: Sun, 24 Oct 2021 17:31:07 +0200
|
||||
Subject: [PATCH 4/4] btrfs: add allocator_hint mode
|
||||
|
||||
When this mode is enabled, the chunk allocation policy is modified as
|
||||
follow.
|
||||
|
||||
Each disk may have a different tag:
|
||||
- BTRFS_DEV_ALLOCATION_PREFERRED_METADATA
|
||||
- BTRFS_DEV_ALLOCATION_METADATA_ONLY
|
||||
- BTRFS_DEV_ALLOCATION_DATA_ONLY
|
||||
- BTRFS_DEV_ALLOCATION_PREFERRED_DATA (default)
|
||||
|
||||
Where:
|
||||
- ALLOCATION_PREFERRED_X means that it is preferred to use this disk for
|
||||
the X chunk type (the other type may be allowed when the space is low)
|
||||
- ALLOCATION_X_ONLY means that it is used *only* for the X chunk type.
|
||||
This means also that it is a preferred choice.
|
||||
|
||||
Each time the allocator allocates a chunk of type X , first it takes the
|
||||
disks tagged as ALLOCATION_X_ONLY or ALLOCATION_PREFERRED_X; if the space
|
||||
is not enough, it uses also the disks tagged as ALLOCATION_METADATA_ONLY;
|
||||
if the space is not enough, it uses also the other disks, with the
|
||||
exception of the one marked as ALLOCATION_PREFERRED_Y, where Y the other
|
||||
type of chunk (i.e. not X).
|
||||
|
||||
Signed-off-by: Goffredo Baroncelli <kreijack@inwind.it>
|
||||
---
|
||||
fs/btrfs/volumes.c | 97 +++++++++++++++++++++++++++++++++++++++++++++-
|
||||
fs/btrfs/volumes.h | 1 +
|
||||
2 files changed, 97 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
|
||||
index f00cdedbbd11d8..e74f2126cdf992 100644
|
||||
--- a/fs/btrfs/volumes.c
|
||||
+++ b/fs/btrfs/volumes.c
|
||||
@@ -179,6 +179,19 @@ enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags
|
||||
return BTRFS_RAID_SINGLE; /* BTRFS_BLOCK_GROUP_SINGLE */
|
||||
}
|
||||
|
||||
+#define BTRFS_DEV_ALLOCATION_MASK ((1ULL << \
|
||||
+ BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT) - 1)
|
||||
+#define BTRFS_DEV_ALLOCATION_MASK_COUNT (1ULL << \
|
||||
+ BTRFS_DEV_ALLOCATION_MASK_BIT_COUNT)
|
||||
+
|
||||
+static const char alloc_hint_map[BTRFS_DEV_ALLOCATION_MASK_COUNT] = {
|
||||
+ [BTRFS_DEV_ALLOCATION_DATA_ONLY] = -1,
|
||||
+ [BTRFS_DEV_ALLOCATION_PREFERRED_DATA] = 0,
|
||||
+ [BTRFS_DEV_ALLOCATION_PREFERRED_METADATA] = 1,
|
||||
+ [BTRFS_DEV_ALLOCATION_METADATA_ONLY] = 2,
|
||||
+ /* the other values are set to 0 */
|
||||
+};
|
||||
+
|
||||
const char *btrfs_bg_type_to_raid_name(u64 flags)
|
||||
{
|
||||
const int index = btrfs_bg_flags_to_raid_index(flags);
|
||||
@@ -4938,13 +4951,18 @@ static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
/*
|
||||
- * sort the devices in descending order by max_avail, total_avail
|
||||
+ * sort the devices in descending order by alloc_hint,
|
||||
+ * max_avail, total_avail
|
||||
*/
|
||||
static int btrfs_cmp_device_info(const void *a, const void *b)
|
||||
{
|
||||
const struct btrfs_device_info *di_a = a;
|
||||
const struct btrfs_device_info *di_b = b;
|
||||
|
||||
+ if (di_a->alloc_hint > di_b->alloc_hint)
|
||||
+ return -1;
|
||||
+ if (di_a->alloc_hint < di_b->alloc_hint)
|
||||
+ return 1;
|
||||
if (di_a->max_avail > di_b->max_avail)
|
||||
return -1;
|
||||
if (di_a->max_avail < di_b->max_avail)
|
||||
@@ -5107,6 +5125,8 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
|
||||
int ndevs = 0;
|
||||
u64 max_avail;
|
||||
u64 dev_offset;
|
||||
+ int hint;
|
||||
+ int i;
|
||||
|
||||
/*
|
||||
* in the first pass through the devices list, we gather information
|
||||
@@ -5159,16 +5179,91 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
|
||||
devices_info[ndevs].max_avail = max_avail;
|
||||
devices_info[ndevs].total_avail = total_avail;
|
||||
devices_info[ndevs].dev = device;
|
||||
+
|
||||
+ if ((ctl->type & BTRFS_BLOCK_GROUP_DATA) &&
|
||||
+ (ctl->type & BTRFS_BLOCK_GROUP_METADATA)) {
|
||||
+ /*
|
||||
+ * if mixed bg set all the alloc_hint
|
||||
+ * fields to the same value, so the sorting
|
||||
+ * is not affected
|
||||
+ */
|
||||
+ devices_info[ndevs].alloc_hint = 0;
|
||||
+ } else if (ctl->type & BTRFS_BLOCK_GROUP_DATA) {
|
||||
+ hint = device->type & BTRFS_DEV_ALLOCATION_MASK;
|
||||
+
|
||||
+ /*
|
||||
+ * skip BTRFS_DEV_METADATA_ONLY disks
|
||||
+ */
|
||||
+ if (hint == BTRFS_DEV_ALLOCATION_METADATA_ONLY)
|
||||
+ continue;
|
||||
+ /*
|
||||
+ * if a data chunk must be allocated,
|
||||
+ * sort also by hint (data disk
|
||||
+ * higher priority)
|
||||
+ */
|
||||
+ devices_info[ndevs].alloc_hint = -alloc_hint_map[hint];
|
||||
+ } else { /* BTRFS_BLOCK_GROUP_METADATA */
|
||||
+ hint = device->type & BTRFS_DEV_ALLOCATION_MASK;
|
||||
+
|
||||
+ /*
|
||||
+ * skip BTRFS_DEV_DATA_ONLY disks
|
||||
+ */
|
||||
+ if (hint == BTRFS_DEV_ALLOCATION_DATA_ONLY)
|
||||
+ continue;
|
||||
+ /*
|
||||
+ * if a data chunk must be allocated,
|
||||
+ * sort also by hint (metadata hint
|
||||
+ * higher priority)
|
||||
+ */
|
||||
+ devices_info[ndevs].alloc_hint = alloc_hint_map[hint];
|
||||
+ }
|
||||
+
|
||||
++ndevs;
|
||||
}
|
||||
ctl->ndevs = ndevs;
|
||||
|
||||
+ /*
|
||||
+ * no devices available
|
||||
+ */
|
||||
+ if (!ndevs)
|
||||
+ return 0;
|
||||
+
|
||||
/*
|
||||
* now sort the devices by hole size / available space
|
||||
*/
|
||||
sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
|
||||
btrfs_cmp_device_info, NULL);
|
||||
|
||||
+ /*
|
||||
+ * select the minimum set of disks grouped by hint that
|
||||
+ * can host the chunk
|
||||
+ */
|
||||
+ ndevs = 0;
|
||||
+ while (ndevs < ctl->ndevs) {
|
||||
+ hint = devices_info[ndevs++].alloc_hint;
|
||||
+ while (ndevs < ctl->ndevs &&
|
||||
+ devices_info[ndevs].alloc_hint == hint)
|
||||
+ ndevs++;
|
||||
+ if (ndevs >= ctl->devs_min)
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ BUG_ON(ndevs > ctl->ndevs);
|
||||
+ ctl->ndevs = ndevs;
|
||||
+
|
||||
+ /*
|
||||
+ * the next layers require the devices_info ordered by
|
||||
+ * max_avail. If we are returing two (or more) different
|
||||
+ * group of alloc_hint, this is not always true. So sort
|
||||
+ * these gain.
|
||||
+ */
|
||||
+
|
||||
+ for (i = 0 ; i < ndevs ; i++)
|
||||
+ devices_info[i].alloc_hint = 0;
|
||||
+
|
||||
+ sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
|
||||
+ btrfs_cmp_device_info, NULL);
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
|
||||
index 5b6861ae468900..1644d7c428a215 100644
|
||||
--- a/fs/btrfs/volumes.h
|
||||
+++ b/fs/btrfs/volumes.h
|
||||
@@ -369,6 +369,7 @@ struct btrfs_device_info {
|
||||
u64 dev_offset;
|
||||
u64 max_avail;
|
||||
u64 total_avail;
|
||||
+ int alloc_hint;
|
||||
};
|
||||
|
||||
struct btrfs_raid_attr {
|
Loading…
x
Reference in New Issue
Block a user