linux/block/blk-integrity.c

390 lines
9.8 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* blk-integrity.c - Block layer data integrity extensions
*
* Copyright (C) 2007, 2008 Oracle Corporation
* Written by: Martin K. Petersen <martin.petersen@oracle.com>
*/
#include <linux/blk-integrity.h>
#include <linux/backing-dev.h>
#include <linux/mempool.h>
#include <linux/bio.h>
#include <linux/scatterlist.h>
#include <linux/export.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 17:04:11 +09:00
#include <linux/slab.h>
fs: add ioctl to query metadata and protection info capabilities Add a new ioctl, FS_IOC_GETLBMD_CAP, to query metadata and protection info (PI) capabilities. This ioctl returns information about the files integrity profile. This is useful for userspace applications to understand a files end-to-end data protection support and configure the I/O accordingly. For now this interface is only supported by block devices. However the design and placement of this ioctl in generic FS ioctl space allows us to extend it to work over files as well. This maybe useful when filesystems start supporting PI-aware layouts. A new structure struct logical_block_metadata_cap is introduced, which contains the following fields: 1. lbmd_flags: bitmask of logical block metadata capability flags 2. lbmd_interval: the amount of data described by each unit of logical block metadata 3. lbmd_size: size in bytes of the logical block metadata associated with each interval 4. lbmd_opaque_size: size in bytes of the opaque block tag associated with each interval 5. lbmd_opaque_offset: offset in bytes of the opaque block tag within the logical block metadata 6. lbmd_pi_size: size in bytes of the T10 PI tuple associated with each interval 7. lbmd_pi_offset: offset in bytes of T10 PI tuple within the logical block metadata 8. lbmd_pi_guard_tag_type: T10 PI guard tag type 9. lbmd_pi_app_tag_size: size in bytes of the T10 PI application tag 10. lbmd_pi_ref_tag_size: size in bytes of the T10 PI reference tag 11. lbmd_pi_storage_tag_size: size in bytes of the T10 PI storage tag The internal logic to fetch the capability is encapsulated in a helper function blk_get_meta_cap(), which uses the blk_integrity profile associated with the device. The ioctl returns -EOPNOTSUPP, if CONFIG_BLK_DEV_INTEGRITY is not enabled. Suggested-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> Link: https://lore.kernel.org/20250630090548.3317-5-anuj20.g@samsung.com Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-06-30 14:35:48 +05:30
#include <linux/t10-pi.h>
#include "blk.h"
/**
* blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
* @q: request queue
* @bio: bio with integrity metadata attached
*
* Description: Returns the number of elements required in a
* scatterlist corresponding to the integrity metadata in a bio.
*/
int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
{
struct bio_vec iv, ivprv = { NULL };
unsigned int segments = 0;
unsigned int seg_size = 0;
struct bvec_iter iter;
int prev = 0;
bio_for_each_integrity_vec(iv, bio, iter) {
if (prev) {
if (!biovec_phys_mergeable(q, &ivprv, &iv))
goto new_segment;
if (seg_size + iv.bv_len > queue_max_segment_size(q))
goto new_segment;
seg_size += iv.bv_len;
} else {
new_segment:
segments++;
seg_size = iv.bv_len;
}
prev = 1;
ivprv = iv;
}
return segments;
}
fs: add ioctl to query metadata and protection info capabilities Add a new ioctl, FS_IOC_GETLBMD_CAP, to query metadata and protection info (PI) capabilities. This ioctl returns information about the files integrity profile. This is useful for userspace applications to understand a files end-to-end data protection support and configure the I/O accordingly. For now this interface is only supported by block devices. However the design and placement of this ioctl in generic FS ioctl space allows us to extend it to work over files as well. This maybe useful when filesystems start supporting PI-aware layouts. A new structure struct logical_block_metadata_cap is introduced, which contains the following fields: 1. lbmd_flags: bitmask of logical block metadata capability flags 2. lbmd_interval: the amount of data described by each unit of logical block metadata 3. lbmd_size: size in bytes of the logical block metadata associated with each interval 4. lbmd_opaque_size: size in bytes of the opaque block tag associated with each interval 5. lbmd_opaque_offset: offset in bytes of the opaque block tag within the logical block metadata 6. lbmd_pi_size: size in bytes of the T10 PI tuple associated with each interval 7. lbmd_pi_offset: offset in bytes of T10 PI tuple within the logical block metadata 8. lbmd_pi_guard_tag_type: T10 PI guard tag type 9. lbmd_pi_app_tag_size: size in bytes of the T10 PI application tag 10. lbmd_pi_ref_tag_size: size in bytes of the T10 PI reference tag 11. lbmd_pi_storage_tag_size: size in bytes of the T10 PI storage tag The internal logic to fetch the capability is encapsulated in a helper function blk_get_meta_cap(), which uses the blk_integrity profile associated with the device. The ioctl returns -EOPNOTSUPP, if CONFIG_BLK_DEV_INTEGRITY is not enabled. Suggested-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> Link: https://lore.kernel.org/20250630090548.3317-5-anuj20.g@samsung.com Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-06-30 14:35:48 +05:30
int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd,
struct logical_block_metadata_cap __user *argp)
{
struct blk_integrity *bi = blk_get_integrity(bdev->bd_disk);
struct logical_block_metadata_cap meta_cap = {};
size_t usize = _IOC_SIZE(cmd);
if (_IOC_DIR(cmd) != _IOC_DIR(FS_IOC_GETLBMD_CAP) ||
_IOC_TYPE(cmd) != _IOC_TYPE(FS_IOC_GETLBMD_CAP) ||
_IOC_NR(cmd) != _IOC_NR(FS_IOC_GETLBMD_CAP) ||
_IOC_SIZE(cmd) < LBMD_SIZE_VER0)
return -ENOIOCTLCMD;
fs: add ioctl to query metadata and protection info capabilities Add a new ioctl, FS_IOC_GETLBMD_CAP, to query metadata and protection info (PI) capabilities. This ioctl returns information about the files integrity profile. This is useful for userspace applications to understand a files end-to-end data protection support and configure the I/O accordingly. For now this interface is only supported by block devices. However the design and placement of this ioctl in generic FS ioctl space allows us to extend it to work over files as well. This maybe useful when filesystems start supporting PI-aware layouts. A new structure struct logical_block_metadata_cap is introduced, which contains the following fields: 1. lbmd_flags: bitmask of logical block metadata capability flags 2. lbmd_interval: the amount of data described by each unit of logical block metadata 3. lbmd_size: size in bytes of the logical block metadata associated with each interval 4. lbmd_opaque_size: size in bytes of the opaque block tag associated with each interval 5. lbmd_opaque_offset: offset in bytes of the opaque block tag within the logical block metadata 6. lbmd_pi_size: size in bytes of the T10 PI tuple associated with each interval 7. lbmd_pi_offset: offset in bytes of T10 PI tuple within the logical block metadata 8. lbmd_pi_guard_tag_type: T10 PI guard tag type 9. lbmd_pi_app_tag_size: size in bytes of the T10 PI application tag 10. lbmd_pi_ref_tag_size: size in bytes of the T10 PI reference tag 11. lbmd_pi_storage_tag_size: size in bytes of the T10 PI storage tag The internal logic to fetch the capability is encapsulated in a helper function blk_get_meta_cap(), which uses the blk_integrity profile associated with the device. The ioctl returns -EOPNOTSUPP, if CONFIG_BLK_DEV_INTEGRITY is not enabled. Suggested-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> Link: https://lore.kernel.org/20250630090548.3317-5-anuj20.g@samsung.com Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-06-30 14:35:48 +05:30
if (!bi)
goto out;
if (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE)
meta_cap.lbmd_flags |= LBMD_PI_CAP_INTEGRITY;
if (bi->flags & BLK_INTEGRITY_REF_TAG)
meta_cap.lbmd_flags |= LBMD_PI_CAP_REFTAG;
meta_cap.lbmd_interval = 1 << bi->interval_exp;
meta_cap.lbmd_size = bi->metadata_size;
meta_cap.lbmd_pi_size = bi->pi_tuple_size;
meta_cap.lbmd_pi_offset = bi->pi_offset;
meta_cap.lbmd_opaque_size = bi->metadata_size - bi->pi_tuple_size;
if (meta_cap.lbmd_opaque_size && !bi->pi_offset)
meta_cap.lbmd_opaque_offset = bi->pi_tuple_size;
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_NONE:
meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_NONE;
break;
case BLK_INTEGRITY_CSUM_IP:
meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_IP;
break;
case BLK_INTEGRITY_CSUM_CRC:
meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_CRC16_T10DIF;
break;
case BLK_INTEGRITY_CSUM_CRC64:
meta_cap.lbmd_guard_tag_type = LBMD_PI_CSUM_CRC64_NVME;
break;
}
fs: add ioctl to query metadata and protection info capabilities Add a new ioctl, FS_IOC_GETLBMD_CAP, to query metadata and protection info (PI) capabilities. This ioctl returns information about the files integrity profile. This is useful for userspace applications to understand a files end-to-end data protection support and configure the I/O accordingly. For now this interface is only supported by block devices. However the design and placement of this ioctl in generic FS ioctl space allows us to extend it to work over files as well. This maybe useful when filesystems start supporting PI-aware layouts. A new structure struct logical_block_metadata_cap is introduced, which contains the following fields: 1. lbmd_flags: bitmask of logical block metadata capability flags 2. lbmd_interval: the amount of data described by each unit of logical block metadata 3. lbmd_size: size in bytes of the logical block metadata associated with each interval 4. lbmd_opaque_size: size in bytes of the opaque block tag associated with each interval 5. lbmd_opaque_offset: offset in bytes of the opaque block tag within the logical block metadata 6. lbmd_pi_size: size in bytes of the T10 PI tuple associated with each interval 7. lbmd_pi_offset: offset in bytes of T10 PI tuple within the logical block metadata 8. lbmd_pi_guard_tag_type: T10 PI guard tag type 9. lbmd_pi_app_tag_size: size in bytes of the T10 PI application tag 10. lbmd_pi_ref_tag_size: size in bytes of the T10 PI reference tag 11. lbmd_pi_storage_tag_size: size in bytes of the T10 PI storage tag The internal logic to fetch the capability is encapsulated in a helper function blk_get_meta_cap(), which uses the blk_integrity profile associated with the device. The ioctl returns -EOPNOTSUPP, if CONFIG_BLK_DEV_INTEGRITY is not enabled. Suggested-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Anuj Gupta <anuj20.g@samsung.com> Signed-off-by: Kanchan Joshi <joshi.k@samsung.com> Link: https://lore.kernel.org/20250630090548.3317-5-anuj20.g@samsung.com Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Christian Brauner <brauner@kernel.org>
2025-06-30 14:35:48 +05:30
if (bi->csum_type != BLK_INTEGRITY_CSUM_NONE)
meta_cap.lbmd_app_tag_size = 2;
if (bi->flags & BLK_INTEGRITY_REF_TAG) {
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_CRC64:
meta_cap.lbmd_ref_tag_size =
sizeof_field(struct crc64_pi_tuple, ref_tag);
break;
case BLK_INTEGRITY_CSUM_CRC:
case BLK_INTEGRITY_CSUM_IP:
meta_cap.lbmd_ref_tag_size =
sizeof_field(struct t10_pi_tuple, ref_tag);
break;
default:
break;
}
}
out:
return copy_struct_to_user(argp, usize, &meta_cap, sizeof(meta_cap),
NULL);
}
/**
* blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
* @rq: request to map
* @sglist: target scatterlist
*
* Description: Map the integrity vectors in request into a
* scatterlist. The scatterlist must be big enough to hold all
* elements. I.e. sized using blk_rq_count_integrity_sg() or
* rq->nr_integrity_segments.
*/
int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
{
struct bio_vec iv, ivprv = { NULL };
struct request_queue *q = rq->q;
struct scatterlist *sg = NULL;
struct bio *bio = rq->bio;
unsigned int segments = 0;
struct bvec_iter iter;
int prev = 0;
bio_for_each_integrity_vec(iv, bio, iter) {
if (prev) {
if (!biovec_phys_mergeable(q, &ivprv, &iv))
goto new_segment;
if (sg->length + iv.bv_len > queue_max_segment_size(q))
goto new_segment;
sg->length += iv.bv_len;
} else {
new_segment:
if (!sg)
sg = sglist;
else {
sg_unmark_end(sg);
sg = sg_next(sg);
}
sg_set_page(sg, iv.bv_page, iv.bv_len, iv.bv_offset);
segments++;
}
prev = 1;
ivprv = iv;
}
if (sg)
sg_mark_end(sg);
/*
* Something must have been wrong if the figured number of segment
* is bigger than number of req's physical integrity segments
*/
BUG_ON(segments > rq->nr_integrity_segments);
BUG_ON(segments > queue_max_integrity_segments(q));
return segments;
}
EXPORT_SYMBOL(blk_rq_map_integrity_sg);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes)
{
int ret;
struct iov_iter iter;
iov_iter_ubuf(&iter, rq_data_dir(rq), ubuf, bytes);
ret = bio_integrity_map_user(rq->bio, &iter);
if (ret)
return ret;
rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, rq->bio);
rq->cmd_flags |= REQ_INTEGRITY;
return 0;
}
EXPORT_SYMBOL_GPL(blk_rq_integrity_map_user);
bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
struct request *next)
{
if (blk_integrity_rq(req) == 0 && blk_integrity_rq(next) == 0)
return true;
if (blk_integrity_rq(req) == 0 || blk_integrity_rq(next) == 0)
return false;
if (bio_integrity(req->bio)->bip_flags !=
bio_integrity(next->bio)->bip_flags)
return false;
if (req->nr_integrity_segments + next->nr_integrity_segments >
q->limits.max_integrity_segments)
return false;
if (integrity_req_gap_back_merge(req, next->bio))
return false;
return true;
}
bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
struct bio *bio)
{
int nr_integrity_segs;
if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL)
return true;
if (blk_integrity_rq(req) == 0 || bio_integrity(bio) == NULL)
return false;
if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags)
return false;
nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
if (req->nr_integrity_segments + nr_integrity_segs >
q->limits.max_integrity_segments)
return false;
return true;
}
static inline struct blk_integrity *dev_to_bi(struct device *dev)
{
return &dev_to_disk(dev)->queue->limits.integrity;
}
const char *blk_integrity_profile_name(struct blk_integrity *bi)
{
switch (bi->csum_type) {
case BLK_INTEGRITY_CSUM_IP:
if (bi->flags & BLK_INTEGRITY_REF_TAG)
return "T10-DIF-TYPE1-IP";
return "T10-DIF-TYPE3-IP";
case BLK_INTEGRITY_CSUM_CRC:
if (bi->flags & BLK_INTEGRITY_REF_TAG)
return "T10-DIF-TYPE1-CRC";
return "T10-DIF-TYPE3-CRC";
case BLK_INTEGRITY_CSUM_CRC64:
if (bi->flags & BLK_INTEGRITY_REF_TAG)
return "EXT-DIF-TYPE1-CRC64";
return "EXT-DIF-TYPE3-CRC64";
case BLK_INTEGRITY_CSUM_NONE:
break;
}
return "nop";
}
EXPORT_SYMBOL_GPL(blk_integrity_profile_name);
static ssize_t flag_store(struct device *dev, const char *page, size_t count,
unsigned char flag)
{
struct request_queue *q = dev_to_disk(dev)->queue;
struct queue_limits lim;
unsigned long val;
int err;
err = kstrtoul(page, 10, &val);
if (err)
return err;
/* note that the flags are inverted vs the values in the sysfs files */
lim = queue_limits_start_update(q);
if (val)
lim.integrity.flags &= ~flag;
else
lim.integrity.flags |= flag;
err = queue_limits_commit_update_frozen(q, &lim);
if (err)
return err;
return count;
}
static ssize_t flag_show(struct device *dev, char *page, unsigned char flag)
{
struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%d\n", !(bi->flags & flag));
}
static ssize_t format_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct blk_integrity *bi = dev_to_bi(dev);
if (!bi->metadata_size)
return sysfs_emit(page, "none\n");
return sysfs_emit(page, "%s\n", blk_integrity_profile_name(bi));
}
static ssize_t tag_size_show(struct device *dev, struct device_attribute *attr,
char *page)
{
struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%u\n", bi->tag_size);
}
static ssize_t protection_interval_bytes_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%u\n",
bi->interval_exp ? 1 << bi->interval_exp : 0);
}
static ssize_t read_verify_store(struct device *dev,
struct device_attribute *attr,
const char *page, size_t count)
{
return flag_store(dev, page, count, BLK_INTEGRITY_NOVERIFY);
}
static ssize_t read_verify_show(struct device *dev,
struct device_attribute *attr, char *page)
{
return flag_show(dev, page, BLK_INTEGRITY_NOVERIFY);
}
static ssize_t write_generate_store(struct device *dev,
struct device_attribute *attr,
const char *page, size_t count)
{
return flag_store(dev, page, count, BLK_INTEGRITY_NOGENERATE);
}
static ssize_t write_generate_show(struct device *dev,
struct device_attribute *attr, char *page)
{
return flag_show(dev, page, BLK_INTEGRITY_NOGENERATE);
}
static ssize_t device_is_integrity_capable_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
struct blk_integrity *bi = dev_to_bi(dev);
return sysfs_emit(page, "%u\n",
!!(bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE));
}
static DEVICE_ATTR_RO(format);
static DEVICE_ATTR_RO(tag_size);
static DEVICE_ATTR_RO(protection_interval_bytes);
static DEVICE_ATTR_RW(read_verify);
static DEVICE_ATTR_RW(write_generate);
static DEVICE_ATTR_RO(device_is_integrity_capable);
static struct attribute *integrity_attrs[] = {
&dev_attr_format.attr,
&dev_attr_tag_size.attr,
&dev_attr_protection_interval_bytes.attr,
&dev_attr_read_verify.attr,
&dev_attr_write_generate.attr,
&dev_attr_device_is_integrity_capable.attr,
NULL
};
const struct attribute_group blk_integrity_attr_group = {
.name = "integrity",
.attrs = integrity_attrs,
};