mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
- Fix dm-raid transient device failure processing and other smaller
tweaks. - Add journal support to the DM raid target to close the 'write hole' on raid 4/5/6. - Fix dm-cache corruption, due to rounding bug, when cache exceeds 2TB. - Add 'metadata2' feature to dm-cache to separate the dirty bitset out from other cache metadata. This improves speed of shutting down a large cache device (which implies writing out dirty bits). - Fix a memory leak during dm-stats data structure destruction. - Fix a DM multipath round-robin path selector performance regression that was caused by less precise balancing across all paths. - Lastly, introduce a DM core fix for a long-standing DM snapshot deadlock that is rooted in the complexity of the device stack used in conjunction with block core maintaining bios on current->bio_list to manage recursion in generic_make_request(). A more comprehensive fix to block core (and its hook in the cpu scheduler) would be wonderful but this DM-specific fix is pragmatic considering how difficult it has been to make progress on a generic fix. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABAgAGBQJYrJJeAAoJEMUj8QotnQNaDskIAIJeMX3Dc8Skt00tZ6vEj3p6 9juDpOrBKH3RYdqPmrYy9lVhhpFs6OoDfTQZaW/SmjDjHboJ3skKMjO+/NWav4nN 39LoDfxLbDi06fC7Y4H7FHUPjb5sKSzw4W5IttFEKmHOwkz+iwVFL1R0dihBqv7G Lq0Ta6xffW8jHrzpmmSDY1I6FSmZ9LlHPCL00qQ5Z7WkMS5oDk0GzZoLFasdNfvm fP9N13+uel2/R7hclpxE6J+IZPN5ARG3HAQ5POS+2gMlIzaH4AlMh7yf5q0sSGwq uQsmdps8c+LOtAakOzVScykEZvwBh+ci8VqE1X1zol+fl8ijeWqgWtz4XXYECC0= =saD8 -----END PGP SIGNATURE----- Merge tag 'dm-4.11-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm Pull device mapper updates from Mike Snitzer: - Fix dm-raid transient device failure processing and other smaller tweaks. - Add journal support to the DM raid target to close the 'write hole' on raid 4/5/6. - Fix dm-cache corruption, due to rounding bug, when cache exceeds 2TB. - Add 'metadata2' feature to dm-cache to separate the dirty bitset out from other cache metadata. This improves speed of shutting down a large cache device (which implies writing out dirty bits). - Fix a memory leak during dm-stats data structure destruction. - Fix a DM multipath round-robin path selector performance regression that was caused by less precise balancing across all paths. - Lastly, introduce a DM core fix for a long-standing DM snapshot deadlock that is rooted in the complexity of the device stack used in conjunction with block core maintaining bios on current->bio_list to manage recursion in generic_make_request(). A more comprehensive fix to block core (and its hook in the cpu scheduler) would be wonderful but this DM-specific fix is pragmatic considering how difficult it has been to make progress on a generic fix. * tag 'dm-4.11-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (22 commits) dm: flush queued bios when process blocks to avoid deadlock dm round robin: revert "use percpu 'repeat_count' and 'current_path'" dm stats: fix a leaked s->histogram_boundaries array dm space map metadata: constify dm_space_map structures dm cache metadata: use cursor api in blocks_are_clean_separate_dirty() dm persistent data: add cursor skip functions to the cursor APIs dm cache metadata: use dm_bitset_new() to create the dirty bitset in format 2 dm bitset: add dm_bitset_new() dm cache metadata: name the cache block that couldn't be loaded dm cache metadata: add "metadata2" feature dm cache metadata: use bitset cursor api to load discard bitset dm bitset: introduce cursor api dm btree: use GFP_NOFS in dm_btree_del() dm space map common: memcpy the disk root to ensure it's arch aligned dm block manager: add unlikely() annotations on dm_bufio error paths dm cache: fix corruption seen when using cache > 2TB dm raid: cleanup awkward branching in raid_message() option processing dm raid: use mddev rather than rdev->mddev dm raid: use read_disk_sb() throughout dm raid: add raid4/5/6 journaling support ...
This commit is contained in:
commit
7a771ceac7
18 changed files with 875 additions and 227 deletions
|
@ -207,6 +207,10 @@ Optional feature arguments are:
|
|||
block, then the cache block is invalidated.
|
||||
To enable passthrough mode the cache must be clean.
|
||||
|
||||
metadata2 : use version 2 of the metadata. This stores the dirty bits
|
||||
in a separate btree, which improves speed of shutting
|
||||
down the cache.
|
||||
|
||||
A policy called 'default' is always registered. This is an alias for
|
||||
the policy we currently think is giving best all round performance.
|
||||
|
||||
|
|
|
@ -161,6 +161,15 @@ The target is named "raid" and it accepts the following parameters:
|
|||
the RAID type (i.e. the allocation algorithm) as well, e.g.
|
||||
changing from raid5_ls to raid5_n.
|
||||
|
||||
[journal_dev <dev>]
|
||||
This option adds a journal device to raid4/5/6 raid sets and
|
||||
uses it to close the 'write hole' caused by the non-atomic updates
|
||||
to the component devices which can cause data loss during recovery.
|
||||
The journal device is used as writethrough thus causing writes to
|
||||
be throttled versus non-journaled raid4/5/6 sets.
|
||||
Takeover/reshape is not possible with a raid4/5/6 journal device;
|
||||
it has to be deconfigured before requesting these.
|
||||
|
||||
<#raid_devs>: The number of devices composing the array.
|
||||
Each device consists of two entries. The first is the device
|
||||
containing the metadata (if any); the second is the one containing the
|
||||
|
@ -245,6 +254,9 @@ recovery. Here is a fuller description of the individual fields:
|
|||
<data_offset> The current data offset to the start of the user data on
|
||||
each component device of a raid set (see the respective
|
||||
raid parameter to support out-of-place reshaping).
|
||||
<journal_char> 'A' - active raid4/5/6 journal device.
|
||||
'D' - dead journal device.
|
||||
'-' - no journal device.
|
||||
|
||||
|
||||
Message Interface
|
||||
|
@ -314,3 +326,8 @@ Version History
|
|||
1.9.0 Add support for RAID level takeover/reshape/region size
|
||||
and set size reduction.
|
||||
1.9.1 Fix activation of existing RAID 4/10 mapped devices
|
||||
1.9.2 Don't emit '- -' on the status table line in case the constructor
|
||||
fails reading a superblock. Correctly emit 'maj:min1 maj:min2' and
|
||||
'D' on the status line. If '- -' is passed into the constructor, emit
|
||||
'- -' on the table line and '-' as the status line health character.
|
||||
1.10.0 Add support for raid4/5/6 journal device
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
* defines a range of metadata versions that this module can handle.
|
||||
*/
|
||||
#define MIN_CACHE_VERSION 1
|
||||
#define MAX_CACHE_VERSION 1
|
||||
#define MAX_CACHE_VERSION 2
|
||||
|
||||
#define CACHE_METADATA_CACHE_SIZE 64
|
||||
|
||||
|
@ -55,6 +55,7 @@ enum mapping_bits {
|
|||
|
||||
/*
|
||||
* The data on the cache is different from that on the origin.
|
||||
* This flag is only used by metadata format 1.
|
||||
*/
|
||||
M_DIRTY = 2
|
||||
};
|
||||
|
@ -93,12 +94,18 @@ struct cache_disk_superblock {
|
|||
__le32 write_misses;
|
||||
|
||||
__le32 policy_version[CACHE_POLICY_VERSION_SIZE];
|
||||
|
||||
/*
|
||||
* Metadata format 2 fields.
|
||||
*/
|
||||
__le64 dirty_root;
|
||||
} __packed;
|
||||
|
||||
struct dm_cache_metadata {
|
||||
atomic_t ref_count;
|
||||
struct list_head list;
|
||||
|
||||
unsigned version;
|
||||
struct block_device *bdev;
|
||||
struct dm_block_manager *bm;
|
||||
struct dm_space_map *metadata_sm;
|
||||
|
@ -141,12 +148,19 @@ struct dm_cache_metadata {
|
|||
*/
|
||||
bool fail_io:1;
|
||||
|
||||
/*
|
||||
* Metadata format 2 fields.
|
||||
*/
|
||||
dm_block_t dirty_root;
|
||||
struct dm_disk_bitset dirty_info;
|
||||
|
||||
/*
|
||||
* These structures are used when loading metadata. They're too
|
||||
* big to put on the stack.
|
||||
*/
|
||||
struct dm_array_cursor mapping_cursor;
|
||||
struct dm_array_cursor hint_cursor;
|
||||
struct dm_bitset_cursor dirty_cursor;
|
||||
};
|
||||
|
||||
/*-------------------------------------------------------------------
|
||||
|
@ -170,6 +184,7 @@ static void sb_prepare_for_write(struct dm_block_validator *v,
|
|||
static int check_metadata_version(struct cache_disk_superblock *disk_super)
|
||||
{
|
||||
uint32_t metadata_version = le32_to_cpu(disk_super->version);
|
||||
|
||||
if (metadata_version < MIN_CACHE_VERSION || metadata_version > MAX_CACHE_VERSION) {
|
||||
DMERR("Cache metadata version %u found, but only versions between %u and %u supported.",
|
||||
metadata_version, MIN_CACHE_VERSION, MAX_CACHE_VERSION);
|
||||
|
@ -310,6 +325,11 @@ static void __copy_sm_root(struct dm_cache_metadata *cmd,
|
|||
sizeof(cmd->metadata_space_map_root));
|
||||
}
|
||||
|
||||
static bool separate_dirty_bits(struct dm_cache_metadata *cmd)
|
||||
{
|
||||
return cmd->version >= 2;
|
||||
}
|
||||
|
||||
static int __write_initial_superblock(struct dm_cache_metadata *cmd)
|
||||
{
|
||||
int r;
|
||||
|
@ -341,7 +361,7 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
|
|||
disk_super->flags = 0;
|
||||
memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
|
||||
disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
|
||||
disk_super->version = cpu_to_le32(MAX_CACHE_VERSION);
|
||||
disk_super->version = cpu_to_le32(cmd->version);
|
||||
memset(disk_super->policy_name, 0, sizeof(disk_super->policy_name));
|
||||
memset(disk_super->policy_version, 0, sizeof(disk_super->policy_version));
|
||||
disk_super->policy_hint_size = 0;
|
||||
|
@ -362,6 +382,9 @@ static int __write_initial_superblock(struct dm_cache_metadata *cmd)
|
|||
disk_super->write_hits = cpu_to_le32(0);
|
||||
disk_super->write_misses = cpu_to_le32(0);
|
||||
|
||||
if (separate_dirty_bits(cmd))
|
||||
disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
|
||||
|
||||
return dm_tm_commit(cmd->tm, sblock);
|
||||
}
|
||||
|
||||
|
@ -382,6 +405,13 @@ static int __format_metadata(struct dm_cache_metadata *cmd)
|
|||
if (r < 0)
|
||||
goto bad;
|
||||
|
||||
if (separate_dirty_bits(cmd)) {
|
||||
dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
|
||||
r = dm_bitset_empty(&cmd->dirty_info, &cmd->dirty_root);
|
||||
if (r < 0)
|
||||
goto bad;
|
||||
}
|
||||
|
||||
dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
|
||||
r = dm_bitset_empty(&cmd->discard_info, &cmd->discard_root);
|
||||
if (r < 0)
|
||||
|
@ -407,9 +437,10 @@ bad:
|
|||
static int __check_incompat_features(struct cache_disk_superblock *disk_super,
|
||||
struct dm_cache_metadata *cmd)
|
||||
{
|
||||
uint32_t features;
|
||||
uint32_t incompat_flags, features;
|
||||
|
||||
features = le32_to_cpu(disk_super->incompat_flags) & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
|
||||
incompat_flags = le32_to_cpu(disk_super->incompat_flags);
|
||||
features = incompat_flags & ~DM_CACHE_FEATURE_INCOMPAT_SUPP;
|
||||
if (features) {
|
||||
DMERR("could not access metadata due to unsupported optional features (%lx).",
|
||||
(unsigned long)features);
|
||||
|
@ -470,6 +501,7 @@ static int __open_metadata(struct dm_cache_metadata *cmd)
|
|||
}
|
||||
|
||||
__setup_mapping_info(cmd);
|
||||
dm_disk_bitset_init(cmd->tm, &cmd->dirty_info);
|
||||
dm_disk_bitset_init(cmd->tm, &cmd->discard_info);
|
||||
sb_flags = le32_to_cpu(disk_super->flags);
|
||||
cmd->clean_when_opened = test_bit(CLEAN_SHUTDOWN, &sb_flags);
|
||||
|
@ -548,6 +580,7 @@ static unsigned long clear_clean_shutdown(unsigned long flags)
|
|||
static void read_superblock_fields(struct dm_cache_metadata *cmd,
|
||||
struct cache_disk_superblock *disk_super)
|
||||
{
|
||||
cmd->version = le32_to_cpu(disk_super->version);
|
||||
cmd->flags = le32_to_cpu(disk_super->flags);
|
||||
cmd->root = le64_to_cpu(disk_super->mapping_root);
|
||||
cmd->hint_root = le64_to_cpu(disk_super->hint_root);
|
||||
|
@ -567,6 +600,9 @@ static void read_superblock_fields(struct dm_cache_metadata *cmd,
|
|||
cmd->stats.write_hits = le32_to_cpu(disk_super->write_hits);
|
||||
cmd->stats.write_misses = le32_to_cpu(disk_super->write_misses);
|
||||
|
||||
if (separate_dirty_bits(cmd))
|
||||
cmd->dirty_root = le64_to_cpu(disk_super->dirty_root);
|
||||
|
||||
cmd->changed = false;
|
||||
}
|
||||
|
||||
|
@ -625,6 +661,13 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
|
|||
*/
|
||||
BUILD_BUG_ON(sizeof(struct cache_disk_superblock) > 512);
|
||||
|
||||
if (separate_dirty_bits(cmd)) {
|
||||
r = dm_bitset_flush(&cmd->dirty_info, cmd->dirty_root,
|
||||
&cmd->dirty_root);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root,
|
||||
&cmd->discard_root);
|
||||
if (r)
|
||||
|
@ -649,6 +692,8 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
|
|||
update_flags(disk_super, mutator);
|
||||
|
||||
disk_super->mapping_root = cpu_to_le64(cmd->root);
|
||||
if (separate_dirty_bits(cmd))
|
||||
disk_super->dirty_root = cpu_to_le64(cmd->dirty_root);
|
||||
disk_super->hint_root = cpu_to_le64(cmd->hint_root);
|
||||
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
|
||||
disk_super->discard_block_size = cpu_to_le64(cmd->discard_block_size);
|
||||
|
@ -698,7 +743,8 @@ static void unpack_value(__le64 value_le, dm_oblock_t *block, unsigned *flags)
|
|||
static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
|
||||
sector_t data_block_size,
|
||||
bool may_format_device,
|
||||
size_t policy_hint_size)
|
||||
size_t policy_hint_size,
|
||||
unsigned metadata_version)
|
||||
{
|
||||
int r;
|
||||
struct dm_cache_metadata *cmd;
|
||||
|
@ -709,6 +755,7 @@ static struct dm_cache_metadata *metadata_open(struct block_device *bdev,
|
|||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
cmd->version = metadata_version;
|
||||
atomic_set(&cmd->ref_count, 1);
|
||||
init_rwsem(&cmd->root_lock);
|
||||
cmd->bdev = bdev;
|
||||
|
@ -757,7 +804,8 @@ static struct dm_cache_metadata *lookup(struct block_device *bdev)
|
|||
static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
|
||||
sector_t data_block_size,
|
||||
bool may_format_device,
|
||||
size_t policy_hint_size)
|
||||
size_t policy_hint_size,
|
||||
unsigned metadata_version)
|
||||
{
|
||||
struct dm_cache_metadata *cmd, *cmd2;
|
||||
|
||||
|
@ -768,7 +816,8 @@ static struct dm_cache_metadata *lookup_or_open(struct block_device *bdev,
|
|||
if (cmd)
|
||||
return cmd;
|
||||
|
||||
cmd = metadata_open(bdev, data_block_size, may_format_device, policy_hint_size);
|
||||
cmd = metadata_open(bdev, data_block_size, may_format_device,
|
||||
policy_hint_size, metadata_version);
|
||||
if (!IS_ERR(cmd)) {
|
||||
mutex_lock(&table_lock);
|
||||
cmd2 = lookup(bdev);
|
||||
|
@ -800,10 +849,11 @@ static bool same_params(struct dm_cache_metadata *cmd, sector_t data_block_size)
|
|||
struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
|
||||
sector_t data_block_size,
|
||||
bool may_format_device,
|
||||
size_t policy_hint_size)
|
||||
size_t policy_hint_size,
|
||||
unsigned metadata_version)
|
||||
{
|
||||
struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size,
|
||||
may_format_device, policy_hint_size);
|
||||
struct dm_cache_metadata *cmd = lookup_or_open(bdev, data_block_size, may_format_device,
|
||||
policy_hint_size, metadata_version);
|
||||
|
||||
if (!IS_ERR(cmd) && !same_params(cmd, data_block_size)) {
|
||||
dm_cache_metadata_close(cmd);
|
||||
|
@ -829,8 +879,8 @@ void dm_cache_metadata_close(struct dm_cache_metadata *cmd)
|
|||
/*
|
||||
* Checks that the given cache block is either unmapped or clean.
|
||||
*/
|
||||
static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
|
||||
bool *result)
|
||||
static int block_clean_combined_dirty(struct dm_cache_metadata *cmd, dm_cblock_t b,
|
||||
bool *result)
|
||||
{
|
||||
int r;
|
||||
__le64 value;
|
||||
|
@ -838,10 +888,8 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
|
|||
unsigned flags;
|
||||
|
||||
r = dm_array_get_value(&cmd->info, cmd->root, from_cblock(b), &value);
|
||||
if (r) {
|
||||
DMERR("block_unmapped_or_clean failed");
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
unpack_value(value, &ob, &flags);
|
||||
*result = !((flags & M_VALID) && (flags & M_DIRTY));
|
||||
|
@ -849,17 +897,19 @@ static int block_unmapped_or_clean(struct dm_cache_metadata *cmd, dm_cblock_t b,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
|
||||
dm_cblock_t begin, dm_cblock_t end,
|
||||
bool *result)
|
||||
static int blocks_are_clean_combined_dirty(struct dm_cache_metadata *cmd,
|
||||
dm_cblock_t begin, dm_cblock_t end,
|
||||
bool *result)
|
||||
{
|
||||
int r;
|
||||
*result = true;
|
||||
|
||||
while (begin != end) {
|
||||
r = block_unmapped_or_clean(cmd, begin, result);
|
||||
if (r)
|
||||
r = block_clean_combined_dirty(cmd, begin, result);
|
||||
if (r) {
|
||||
DMERR("block_clean_combined_dirty failed");
|
||||
return r;
|
||||
}
|
||||
|
||||
if (!*result) {
|
||||
DMERR("cache block %llu is dirty",
|
||||
|
@ -873,6 +923,67 @@ static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int blocks_are_clean_separate_dirty(struct dm_cache_metadata *cmd,
|
||||
dm_cblock_t begin, dm_cblock_t end,
|
||||
bool *result)
|
||||
{
|
||||
int r;
|
||||
bool dirty_flag;
|
||||
*result = true;
|
||||
|
||||
r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
|
||||
from_cblock(begin), &cmd->dirty_cursor);
|
||||
if (r) {
|
||||
DMERR("%s: dm_bitset_cursor_begin for dirty failed", __func__);
|
||||
return r;
|
||||
}
|
||||
|
||||
r = dm_bitset_cursor_skip(&cmd->dirty_cursor, from_cblock(begin));
|
||||
if (r) {
|
||||
DMERR("%s: dm_bitset_cursor_skip for dirty failed", __func__);
|
||||
dm_bitset_cursor_end(&cmd->dirty_cursor);
|
||||
return r;
|
||||
}
|
||||
|
||||
while (begin != end) {
|
||||
/*
|
||||
* We assume that unmapped blocks have their dirty bit
|
||||
* cleared.
|
||||
*/
|
||||
dirty_flag = dm_bitset_cursor_get_value(&cmd->dirty_cursor);
|
||||
if (dirty_flag) {
|
||||
DMERR("%s: cache block %llu is dirty", __func__,
|
||||
(unsigned long long) from_cblock(begin));
|
||||
dm_bitset_cursor_end(&cmd->dirty_cursor);
|
||||
*result = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
r = dm_bitset_cursor_next(&cmd->dirty_cursor);
|
||||
if (r) {
|
||||
DMERR("%s: dm_bitset_cursor_next for dirty failed", __func__);
|
||||
dm_bitset_cursor_end(&cmd->dirty_cursor);
|
||||
return r;
|
||||
}
|
||||
|
||||
begin = to_cblock(from_cblock(begin) + 1);
|
||||
}
|
||||
|
||||
dm_bitset_cursor_end(&cmd->dirty_cursor);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int blocks_are_unmapped_or_clean(struct dm_cache_metadata *cmd,
|
||||
dm_cblock_t begin, dm_cblock_t end,
|
||||
bool *result)
|
||||
{
|
||||
if (separate_dirty_bits(cmd))
|
||||
return blocks_are_clean_separate_dirty(cmd, begin, end, result);
|
||||
else
|
||||
return blocks_are_clean_combined_dirty(cmd, begin, end, result);
|
||||
}
|
||||
|
||||
static bool cmd_write_lock(struct dm_cache_metadata *cmd)
|
||||
{
|
||||
down_write(&cmd->root_lock);
|
||||
|
@ -950,8 +1061,18 @@ int dm_cache_resize(struct dm_cache_metadata *cmd, dm_cblock_t new_cache_size)
|
|||
r = dm_array_resize(&cmd->info, cmd->root, from_cblock(cmd->cache_blocks),
|
||||
from_cblock(new_cache_size),
|
||||
&null_mapping, &cmd->root);
|
||||
if (!r)
|
||||
cmd->cache_blocks = new_cache_size;
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
if (separate_dirty_bits(cmd)) {
|
||||
r = dm_bitset_resize(&cmd->dirty_info, cmd->dirty_root,
|
||||
from_cblock(cmd->cache_blocks), from_cblock(new_cache_size),
|
||||
false, &cmd->dirty_root);
|
||||
if (r)
|
||||
goto out;
|
||||
}
|
||||
|
||||
cmd->cache_blocks = new_cache_size;
|
||||
cmd->changed = true;
|
||||
|
||||
out:
|
||||
|
@ -995,14 +1116,6 @@ static int __clear_discard(struct dm_cache_metadata *cmd, dm_dblock_t b)
|
|||
from_dblock(b), &cmd->discard_root);
|
||||
}
|
||||
|
||||
static int __is_discarded(struct dm_cache_metadata *cmd, dm_dblock_t b,
|
||||
bool *is_discarded)
|
||||
{
|
||||
return dm_bitset_test_bit(&cmd->discard_info, cmd->discard_root,
|
||||
from_dblock(b), &cmd->discard_root,
|
||||
is_discarded);
|
||||
}
|
||||
|
||||
static int __discard(struct dm_cache_metadata *cmd,
|
||||
dm_dblock_t dblock, bool discard)
|
||||
{
|
||||
|
@ -1032,22 +1145,38 @@ static int __load_discards(struct dm_cache_metadata *cmd,
|
|||
load_discard_fn fn, void *context)
|
||||
{
|
||||
int r = 0;
|
||||
dm_block_t b;
|
||||
bool discard;
|
||||
uint32_t b;
|
||||
struct dm_bitset_cursor c;
|
||||
|
||||
for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
|
||||
dm_dblock_t dblock = to_dblock(b);
|
||||
if (from_dblock(cmd->discard_nr_blocks) == 0)
|
||||
/* nothing to do */
|
||||
return 0;
|
||||
|
||||
if (cmd->clean_when_opened) {
|
||||
r = __is_discarded(cmd, dblock, &discard);
|
||||
if (cmd->clean_when_opened) {
|
||||
r = dm_bitset_flush(&cmd->discard_info, cmd->discard_root, &cmd->discard_root);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
r = dm_bitset_cursor_begin(&cmd->discard_info, cmd->discard_root,
|
||||
from_dblock(cmd->discard_nr_blocks), &c);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
|
||||
r = fn(context, cmd->discard_block_size, to_dblock(b),
|
||||
dm_bitset_cursor_get_value(&c));
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
|
||||
dm_bitset_cursor_end(&c);
|
||||
|
||||
} else {
|
||||
for (b = 0; b < from_dblock(cmd->discard_nr_blocks); b++) {
|
||||
r = fn(context, cmd->discard_block_size, to_dblock(b), false);
|
||||
if (r)
|
||||
return r;
|
||||
} else
|
||||
discard = false;
|
||||
|
||||
r = fn(context, cmd->discard_block_size, dblock, discard);
|
||||
if (r)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
|
@ -1177,11 +1306,11 @@ static bool hints_array_available(struct dm_cache_metadata *cmd,
|
|||
hints_array_initialized(cmd);
|
||||
}
|
||||
|
||||
static int __load_mapping(struct dm_cache_metadata *cmd,
|
||||
uint64_t cb, bool hints_valid,
|
||||
struct dm_array_cursor *mapping_cursor,
|
||||
struct dm_array_cursor *hint_cursor,
|
||||
load_mapping_fn fn, void *context)
|
||||
static int __load_mapping_v1(struct dm_cache_metadata *cmd,
|
||||
uint64_t cb, bool hints_valid,
|
||||
struct dm_array_cursor *mapping_cursor,
|
||||
struct dm_array_cursor *hint_cursor,
|
||||
load_mapping_fn fn, void *context)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
|
@ -1206,8 +1335,51 @@ static int __load_mapping(struct dm_cache_metadata *cmd,
|
|||
|
||||
r = fn(context, oblock, to_cblock(cb), flags & M_DIRTY,
|
||||
le32_to_cpu(hint), hints_valid);
|
||||
if (r)
|
||||
DMERR("policy couldn't load cblock");
|
||||
if (r) {
|
||||
DMERR("policy couldn't load cache block %llu",
|
||||
(unsigned long long) from_cblock(to_cblock(cb)));
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static int __load_mapping_v2(struct dm_cache_metadata *cmd,
|
||||
uint64_t cb, bool hints_valid,
|
||||
struct dm_array_cursor *mapping_cursor,
|
||||
struct dm_array_cursor *hint_cursor,
|
||||
struct dm_bitset_cursor *dirty_cursor,
|
||||
load_mapping_fn fn, void *context)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
__le64 mapping;
|
||||
__le32 hint = 0;
|
||||
|
||||
__le64 *mapping_value_le;
|
||||
__le32 *hint_value_le;
|
||||
|
||||
dm_oblock_t oblock;
|
||||
unsigned flags;
|
||||
bool dirty;
|
||||
|
||||
dm_array_cursor_get_value(mapping_cursor, (void **) &mapping_value_le);
|
||||
memcpy(&mapping, mapping_value_le, sizeof(mapping));
|
||||
unpack_value(mapping, &oblock, &flags);
|
||||
|
||||
if (flags & M_VALID) {
|
||||
if (hints_valid) {
|
||||
dm_array_cursor_get_value(hint_cursor, (void **) &hint_value_le);
|
||||
memcpy(&hint, hint_value_le, sizeof(hint));
|
||||
}
|
||||
|
||||
dirty = dm_bitset_cursor_get_value(dirty_cursor);
|
||||
r = fn(context, oblock, to_cblock(cb), dirty,
|
||||
le32_to_cpu(hint), hints_valid);
|
||||
if (r) {
|
||||
DMERR("policy couldn't load cache block %llu",
|
||||
(unsigned long long) from_cblock(to_cblock(cb)));
|
||||
}
|
||||
}
|
||||
|
||||
return r;
|
||||
|
@ -1238,10 +1410,28 @@ static int __load_mappings(struct dm_cache_metadata *cmd,
|
|||
}
|
||||
}
|
||||
|
||||
if (separate_dirty_bits(cmd)) {
|
||||
r = dm_bitset_cursor_begin(&cmd->dirty_info, cmd->dirty_root,
|
||||
from_cblock(cmd->cache_blocks),
|
||||
&cmd->dirty_cursor);
|
||||
if (r) {
|
||||
dm_array_cursor_end(&cmd->hint_cursor);
|
||||
dm_array_cursor_end(&cmd->mapping_cursor);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
for (cb = 0; ; cb++) {
|
||||
r = __load_mapping(cmd, cb, hints_valid,
|
||||
&cmd->mapping_cursor, &cmd->hint_cursor,
|
||||
fn, context);
|
||||
if (separate_dirty_bits(cmd))
|
||||
r = __load_mapping_v2(cmd, cb, hints_valid,
|
||||
&cmd->mapping_cursor,
|
||||
&cmd->hint_cursor,
|
||||
&cmd->dirty_cursor,
|
||||
fn, context);
|
||||
else
|
||||
r = __load_mapping_v1(cmd, cb, hints_valid,
|
||||
&cmd->mapping_cursor, &cmd->hint_cursor,
|
||||
fn, context);
|
||||
if (r)
|
||||
goto out;
|
||||
|
||||
|
@ -1264,12 +1454,23 @@ static int __load_mappings(struct dm_cache_metadata *cmd,
|
|||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (separate_dirty_bits(cmd)) {
|
||||
r = dm_bitset_cursor_next(&cmd->dirty_cursor);
|
||||
if (r) {
|
||||
DMERR("dm_bitset_cursor_next for dirty failed");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
out:
|
||||
dm_array_cursor_end(&cmd->mapping_cursor);
|
||||
if (hints_valid)
|
||||
dm_array_cursor_end(&cmd->hint_cursor);
|
||||
|
||||
if (separate_dirty_bits(cmd))
|
||||
dm_bitset_cursor_end(&cmd->dirty_cursor);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -1352,13 +1553,55 @@ static int __dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty
|
|||
|
||||
}
|
||||
|
||||
int dm_cache_set_dirty(struct dm_cache_metadata *cmd,
|
||||
dm_cblock_t cblock, bool dirty)
|
||||
static int __set_dirty_bits_v1(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
|
||||
{
|
||||
int r;
|
||||
unsigned i;
|
||||
for (i = 0; i < nr_bits; i++) {
|
||||
r = __dirty(cmd, to_cblock(i), test_bit(i, bits));
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int is_dirty_callback(uint32_t index, bool *value, void *context)
|
||||
{
|
||||
unsigned long *bits = context;
|
||||
*value = test_bit(index, bits);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __set_dirty_bits_v2(struct dm_cache_metadata *cmd, unsigned nr_bits, unsigned long *bits)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
/* nr_bits is really just a sanity check */
|
||||
if (nr_bits != from_cblock(cmd->cache_blocks)) {
|
||||
DMERR("dirty bitset is wrong size");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
r = dm_bitset_del(&cmd->dirty_info, cmd->dirty_root);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
cmd->changed = true;
|
||||
return dm_bitset_new(&cmd->dirty_info, &cmd->dirty_root, nr_bits, is_dirty_callback, bits);
|
||||
}
|
||||
|
||||
int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd,
|
||||
unsigned nr_bits,
|
||||
unsigned long *bits)
|
||||
{
|
||||
int r;
|
||||
|
||||
WRITE_LOCK(cmd);
|
||||
r = __dirty(cmd, cblock, dirty);
|
||||
if (separate_dirty_bits(cmd))
|
||||
r = __set_dirty_bits_v2(cmd, nr_bits, bits);
|
||||
else
|
||||
r = __set_dirty_bits_v1(cmd, nr_bits, bits);
|
||||
WRITE_UNLOCK(cmd);
|
||||
|
||||
return r;
|
||||
|
|
|
@ -45,18 +45,20 @@
|
|||
* As these various flags are defined they should be added to the
|
||||
* following masks.
|
||||
*/
|
||||
|
||||
#define DM_CACHE_FEATURE_COMPAT_SUPP 0UL
|
||||
#define DM_CACHE_FEATURE_COMPAT_RO_SUPP 0UL
|
||||
#define DM_CACHE_FEATURE_INCOMPAT_SUPP 0UL
|
||||
|
||||
/*
|
||||
* Reopens or creates a new, empty metadata volume.
|
||||
* Returns an ERR_PTR on failure.
|
||||
* Reopens or creates a new, empty metadata volume. Returns an ERR_PTR on
|
||||
* failure. If reopening then features must match.
|
||||
*/
|
||||
struct dm_cache_metadata *dm_cache_metadata_open(struct block_device *bdev,
|
||||
sector_t data_block_size,
|
||||
bool may_format_device,
|
||||
size_t policy_hint_size);
|
||||
size_t policy_hint_size,
|
||||
unsigned metadata_version);
|
||||
|
||||
void dm_cache_metadata_close(struct dm_cache_metadata *cmd);
|
||||
|
||||
|
@ -91,7 +93,8 @@ int dm_cache_load_mappings(struct dm_cache_metadata *cmd,
|
|||
load_mapping_fn fn,
|
||||
void *context);
|
||||
|
||||
int dm_cache_set_dirty(struct dm_cache_metadata *cmd, dm_cblock_t cblock, bool dirty);
|
||||
int dm_cache_set_dirty_bits(struct dm_cache_metadata *cmd,
|
||||
unsigned nr_bits, unsigned long *bits);
|
||||
|
||||
struct dm_cache_statistics {
|
||||
uint32_t read_hits;
|
||||
|
|
|
@ -179,6 +179,7 @@ enum cache_io_mode {
|
|||
struct cache_features {
|
||||
enum cache_metadata_mode mode;
|
||||
enum cache_io_mode io_mode;
|
||||
unsigned metadata_version;
|
||||
};
|
||||
|
||||
struct cache_stats {
|
||||
|
@ -248,7 +249,7 @@ struct cache {
|
|||
/*
|
||||
* Fields for converting from sectors to blocks.
|
||||
*/
|
||||
uint32_t sectors_per_block;
|
||||
sector_t sectors_per_block;
|
||||
int sectors_per_block_shift;
|
||||
|
||||
spinlock_t lock;
|
||||
|
@ -2534,13 +2535,14 @@ static void init_features(struct cache_features *cf)
|
|||
{
|
||||
cf->mode = CM_WRITE;
|
||||
cf->io_mode = CM_IO_WRITEBACK;
|
||||
cf->metadata_version = 1;
|
||||
}
|
||||
|
||||
static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
|
||||
char **error)
|
||||
{
|
||||
static struct dm_arg _args[] = {
|
||||
{0, 1, "Invalid number of cache feature arguments"},
|
||||
{0, 2, "Invalid number of cache feature arguments"},
|
||||
};
|
||||
|
||||
int r;
|
||||
|
@ -2566,6 +2568,9 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
|
|||
else if (!strcasecmp(arg, "passthrough"))
|
||||
cf->io_mode = CM_IO_PASSTHROUGH;
|
||||
|
||||
else if (!strcasecmp(arg, "metadata2"))
|
||||
cf->metadata_version = 2;
|
||||
|
||||
else {
|
||||
*error = "Unrecognised cache feature requested";
|
||||
return -EINVAL;
|
||||
|
@ -2820,7 +2825,8 @@ static int cache_create(struct cache_args *ca, struct cache **result)
|
|||
|
||||
cmd = dm_cache_metadata_open(cache->metadata_dev->bdev,
|
||||
ca->block_size, may_format,
|
||||
dm_cache_policy_get_hint_size(cache->policy));
|
||||
dm_cache_policy_get_hint_size(cache->policy),
|
||||
ca->features.metadata_version);
|
||||
if (IS_ERR(cmd)) {
|
||||
*error = "Error creating metadata object";
|
||||
r = PTR_ERR(cmd);
|
||||
|
@ -3165,21 +3171,16 @@ static int cache_end_io(struct dm_target *ti, struct bio *bio, int error)
|
|||
|
||||
static int write_dirty_bitset(struct cache *cache)
|
||||
{
|
||||
unsigned i, r;
|
||||
int r;
|
||||
|
||||
if (get_cache_mode(cache) >= CM_READ_ONLY)
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < from_cblock(cache->cache_size); i++) {
|
||||
r = dm_cache_set_dirty(cache->cmd, to_cblock(i),
|
||||
is_dirty(cache, to_cblock(i)));
|
||||
if (r) {
|
||||
metadata_operation_failed(cache, "dm_cache_set_dirty", r);
|
||||
return r;
|
||||
}
|
||||
}
|
||||
r = dm_cache_set_dirty_bits(cache->cmd, from_cblock(cache->cache_size), cache->dirty_bitset);
|
||||
if (r)
|
||||
metadata_operation_failed(cache, "dm_cache_set_dirty_bits", r);
|
||||
|
||||
return 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
static int write_discard_bitset(struct cache *cache)
|
||||
|
@ -3540,11 +3541,11 @@ static void cache_status(struct dm_target *ti, status_type_t type,
|
|||
|
||||
residency = policy_residency(cache->policy);
|
||||
|
||||
DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ",
|
||||
DMEMIT("%u %llu/%llu %llu %llu/%llu %u %u %u %u %u %u %lu ",
|
||||
(unsigned)DM_CACHE_METADATA_BLOCK_SIZE,
|
||||
(unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata),
|
||||
(unsigned long long)nr_blocks_metadata,
|
||||
cache->sectors_per_block,
|
||||
(unsigned long long)cache->sectors_per_block,
|
||||
(unsigned long long) from_cblock(residency),
|
||||
(unsigned long long) from_cblock(cache->cache_size),
|
||||
(unsigned) atomic_read(&cache->stats.read_hit),
|
||||
|
@ -3555,14 +3556,19 @@ static void cache_status(struct dm_target *ti, status_type_t type,
|
|||
(unsigned) atomic_read(&cache->stats.promotion),
|
||||
(unsigned long) atomic_read(&cache->nr_dirty));
|
||||
|
||||
if (cache->features.metadata_version == 2)
|
||||
DMEMIT("2 metadata2 ");
|
||||
else
|
||||
DMEMIT("1 ");
|
||||
|
||||
if (writethrough_mode(&cache->features))
|
||||
DMEMIT("1 writethrough ");
|
||||
DMEMIT("writethrough ");
|
||||
|
||||
else if (passthrough_mode(&cache->features))
|
||||
DMEMIT("1 passthrough ");
|
||||
DMEMIT("passthrough ");
|
||||
|
||||
else if (writeback_mode(&cache->features))
|
||||
DMEMIT("1 writeback ");
|
||||
DMEMIT("writeback ");
|
||||
|
||||
else {
|
||||
DMERR("%s: internal error: unknown io mode: %d",
|
||||
|
@ -3810,7 +3816,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
|
|||
|
||||
static struct target_type cache_target = {
|
||||
.name = "cache",
|
||||
.version = {1, 9, 0},
|
||||
.version = {1, 10, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = cache_ctr,
|
||||
.dtr = cache_dtr,
|
||||
|
|
|
@ -24,6 +24,11 @@
|
|||
*/
|
||||
#define MIN_FREE_RESHAPE_SPACE to_sector(4*4096)
|
||||
|
||||
/*
|
||||
* Minimum journal space 4 MiB in sectors.
|
||||
*/
|
||||
#define MIN_RAID456_JOURNAL_SPACE (4*2048)
|
||||
|
||||
static bool devices_handle_discard_safely = false;
|
||||
|
||||
/*
|
||||
|
@ -73,6 +78,9 @@ struct raid_dev {
|
|||
#define __CTR_FLAG_DATA_OFFSET 13 /* 2 */ /* Only with reshapable raid4/5/6/10! */
|
||||
#define __CTR_FLAG_RAID10_USE_NEAR_SETS 14 /* 2 */ /* Only with raid10! */
|
||||
|
||||
/* New for v1.10.0 */
|
||||
#define __CTR_FLAG_JOURNAL_DEV 15 /* 2 */ /* Only with raid4/5/6! */
|
||||
|
||||
/*
|
||||
* Flags for rs->ctr_flags field.
|
||||
*/
|
||||
|
@ -91,6 +99,7 @@ struct raid_dev {
|
|||
#define CTR_FLAG_DELTA_DISKS (1 << __CTR_FLAG_DELTA_DISKS)
|
||||
#define CTR_FLAG_DATA_OFFSET (1 << __CTR_FLAG_DATA_OFFSET)
|
||||
#define CTR_FLAG_RAID10_USE_NEAR_SETS (1 << __CTR_FLAG_RAID10_USE_NEAR_SETS)
|
||||
#define CTR_FLAG_JOURNAL_DEV (1 << __CTR_FLAG_JOURNAL_DEV)
|
||||
|
||||
/*
|
||||
* Definitions of various constructor flags to
|
||||
|
@ -163,7 +172,8 @@ struct raid_dev {
|
|||
CTR_FLAG_STRIPE_CACHE | \
|
||||
CTR_FLAG_REGION_SIZE | \
|
||||
CTR_FLAG_DELTA_DISKS | \
|
||||
CTR_FLAG_DATA_OFFSET)
|
||||
CTR_FLAG_DATA_OFFSET | \
|
||||
CTR_FLAG_JOURNAL_DEV)
|
||||
|
||||
#define RAID6_VALID_FLAGS (CTR_FLAG_SYNC | \
|
||||
CTR_FLAG_REBUILD | \
|
||||
|
@ -173,7 +183,8 @@ struct raid_dev {
|
|||
CTR_FLAG_STRIPE_CACHE | \
|
||||
CTR_FLAG_REGION_SIZE | \
|
||||
CTR_FLAG_DELTA_DISKS | \
|
||||
CTR_FLAG_DATA_OFFSET)
|
||||
CTR_FLAG_DATA_OFFSET | \
|
||||
CTR_FLAG_JOURNAL_DEV)
|
||||
/* ...valid options definitions per raid level */
|
||||
|
||||
/*
|
||||
|
@ -222,6 +233,12 @@ struct raid_set {
|
|||
struct raid_type *raid_type;
|
||||
struct dm_target_callbacks callbacks;
|
||||
|
||||
/* Optional raid4/5/6 journal device */
|
||||
struct journal_dev {
|
||||
struct dm_dev *dev;
|
||||
struct md_rdev rdev;
|
||||
} journal_dev;
|
||||
|
||||
struct raid_dev dev[0];
|
||||
};
|
||||
|
||||
|
@ -306,6 +323,7 @@ static struct arg_name_flag {
|
|||
{ CTR_FLAG_DATA_OFFSET, "data_offset"},
|
||||
{ CTR_FLAG_DELTA_DISKS, "delta_disks"},
|
||||
{ CTR_FLAG_RAID10_USE_NEAR_SETS, "raid10_use_near_sets"},
|
||||
{ CTR_FLAG_JOURNAL_DEV, "journal_dev" },
|
||||
};
|
||||
|
||||
/* Return argument name string for given @flag */
|
||||
|
@ -370,7 +388,7 @@ static bool rs_is_reshapable(struct raid_set *rs)
|
|||
/* Return true, if raid set in @rs is recovering */
|
||||
static bool rs_is_recovering(struct raid_set *rs)
|
||||
{
|
||||
return rs->md.recovery_cp < rs->dev[0].rdev.sectors;
|
||||
return rs->md.recovery_cp < rs->md.dev_sectors;
|
||||
}
|
||||
|
||||
/* Return true, if raid set in @rs is reshaping */
|
||||
|
@ -627,7 +645,8 @@ static void rs_set_capacity(struct raid_set *rs)
|
|||
* is unintended in case of out-of-place reshaping
|
||||
*/
|
||||
rdev_for_each(rdev, mddev)
|
||||
rdev->sectors = mddev->dev_sectors;
|
||||
if (!test_bit(Journal, &rdev->flags))
|
||||
rdev->sectors = mddev->dev_sectors;
|
||||
|
||||
set_capacity(gendisk, mddev->array_sectors);
|
||||
revalidate_disk(gendisk);
|
||||
|
@ -713,6 +732,11 @@ static void raid_set_free(struct raid_set *rs)
|
|||
{
|
||||
int i;
|
||||
|
||||
if (rs->journal_dev.dev) {
|
||||
md_rdev_clear(&rs->journal_dev.rdev);
|
||||
dm_put_device(rs->ti, rs->journal_dev.dev);
|
||||
}
|
||||
|
||||
for (i = 0; i < rs->raid_disks; i++) {
|
||||
if (rs->dev[i].meta_dev)
|
||||
dm_put_device(rs->ti, rs->dev[i].meta_dev);
|
||||
|
@ -760,10 +784,11 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
|
|||
rs->dev[i].data_dev = NULL;
|
||||
|
||||
/*
|
||||
* There are no offsets, since there is a separate device
|
||||
* for data and metadata.
|
||||
* There are no offsets initially.
|
||||
* Out of place reshape will set them accordingly.
|
||||
*/
|
||||
rs->dev[i].rdev.data_offset = 0;
|
||||
rs->dev[i].rdev.new_data_offset = 0;
|
||||
rs->dev[i].rdev.mddev = &rs->md;
|
||||
|
||||
arg = dm_shift_arg(as);
|
||||
|
@ -821,6 +846,9 @@ static int parse_dev_params(struct raid_set *rs, struct dm_arg_set *as)
|
|||
rebuild++;
|
||||
}
|
||||
|
||||
if (rs->journal_dev.dev)
|
||||
list_add_tail(&rs->journal_dev.rdev.same_set, &rs->md.disks);
|
||||
|
||||
if (metadata_available) {
|
||||
rs->md.external = 0;
|
||||
rs->md.persistent = 1;
|
||||
|
@ -1026,6 +1054,8 @@ too_many:
|
|||
* [max_write_behind <sectors>] See '-write-behind=' (man mdadm)
|
||||
* [stripe_cache <sectors>] Stripe cache size for higher RAIDs
|
||||
* [region_size <sectors>] Defines granularity of bitmap
|
||||
* [journal_dev <dev>] raid4/5/6 journaling deviice
|
||||
* (i.e. write hole closing log)
|
||||
*
|
||||
* RAID10-only options:
|
||||
* [raid10_copies <# copies>] Number of copies. (Default: 2)
|
||||
|
@ -1133,7 +1163,7 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
|
|||
/*
|
||||
* Parameters that take a string value are checked here.
|
||||
*/
|
||||
|
||||
/* "raid10_format {near|offset|far} */
|
||||
if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_RAID10_FORMAT))) {
|
||||
if (test_and_set_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags)) {
|
||||
rs->ti->error = "Only one 'raid10_format' argument pair allowed";
|
||||
|
@ -1151,6 +1181,41 @@ static int parse_raid_params(struct raid_set *rs, struct dm_arg_set *as,
|
|||
continue;
|
||||
}
|
||||
|
||||
/* "journal_dev dev" */
|
||||
if (!strcasecmp(key, dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV))) {
|
||||
int r;
|
||||
struct md_rdev *jdev;
|
||||
|
||||
if (test_and_set_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
|
||||
rs->ti->error = "Only one raid4/5/6 set journaling device allowed";
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!rt_is_raid456(rt)) {
|
||||
rs->ti->error = "'journal_dev' is an invalid parameter for this RAID type";
|
||||
return -EINVAL;
|
||||
}
|
||||
r = dm_get_device(rs->ti, arg, dm_table_get_mode(rs->ti->table),
|
||||
&rs->journal_dev.dev);
|
||||
if (r) {
|
||||
rs->ti->error = "raid4/5/6 journal device lookup failure";
|
||||
return r;
|
||||
}
|
||||
jdev = &rs->journal_dev.rdev;
|
||||
md_rdev_init(jdev);
|
||||
jdev->mddev = &rs->md;
|
||||
jdev->bdev = rs->journal_dev.dev->bdev;
|
||||
jdev->sectors = to_sector(i_size_read(jdev->bdev->bd_inode));
|
||||
if (jdev->sectors < MIN_RAID456_JOURNAL_SPACE) {
|
||||
rs->ti->error = "No space for raid4/5/6 journal";
|
||||
return -ENOSPC;
|
||||
}
|
||||
set_bit(Journal, &jdev->flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parameters with number values from here on.
|
||||
*/
|
||||
if (kstrtoint(arg, 10, &value) < 0) {
|
||||
rs->ti->error = "Bad numerical argument given in raid params";
|
||||
return -EINVAL;
|
||||
|
@ -1425,6 +1490,25 @@ static unsigned int rs_data_stripes(struct raid_set *rs)
|
|||
return rs->raid_disks - rs->raid_type->parity_devs;
|
||||
}
|
||||
|
||||
/*
|
||||
* Retrieve rdev->sectors from any valid raid device of @rs
|
||||
* to allow userpace to pass in arbitray "- -" device tupples.
|
||||
*/
|
||||
static sector_t __rdev_sectors(struct raid_set *rs)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < rs->md.raid_disks; i++) {
|
||||
struct md_rdev *rdev = &rs->dev[i].rdev;
|
||||
|
||||
if (!test_bit(Journal, &rdev->flags) &&
|
||||
rdev->bdev && rdev->sectors)
|
||||
return rdev->sectors;
|
||||
}
|
||||
|
||||
BUG(); /* Constructor ensures we got some. */
|
||||
}
|
||||
|
||||
/* Calculate the sectors per device and per array used for @rs */
|
||||
static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
|
||||
{
|
||||
|
@ -1468,7 +1552,8 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev)
|
|||
array_sectors = (data_stripes + delta_disks) * dev_sectors;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
rdev->sectors = dev_sectors;
|
||||
if (!test_bit(Journal, &rdev->flags))
|
||||
rdev->sectors = dev_sectors;
|
||||
|
||||
mddev->array_sectors = array_sectors;
|
||||
mddev->dev_sectors = dev_sectors;
|
||||
|
@ -1510,9 +1595,9 @@ static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors)
|
|||
else if (dev_sectors == MaxSector)
|
||||
/* Prevent recovery */
|
||||
__rs_setup_recovery(rs, MaxSector);
|
||||
else if (rs->dev[0].rdev.sectors < dev_sectors)
|
||||
else if (__rdev_sectors(rs) < dev_sectors)
|
||||
/* Grown raid set */
|
||||
__rs_setup_recovery(rs, rs->dev[0].rdev.sectors);
|
||||
__rs_setup_recovery(rs, __rdev_sectors(rs));
|
||||
else
|
||||
__rs_setup_recovery(rs, MaxSector);
|
||||
}
|
||||
|
@ -1851,18 +1936,21 @@ static int rs_check_reshape(struct raid_set *rs)
|
|||
return -EPERM;
|
||||
}
|
||||
|
||||
static int read_disk_sb(struct md_rdev *rdev, int size)
|
||||
static int read_disk_sb(struct md_rdev *rdev, int size, bool force_reload)
|
||||
{
|
||||
BUG_ON(!rdev->sb_page);
|
||||
|
||||
if (rdev->sb_loaded)
|
||||
if (rdev->sb_loaded && !force_reload)
|
||||
return 0;
|
||||
|
||||
rdev->sb_loaded = 0;
|
||||
|
||||
if (!sync_page_io(rdev, 0, size, rdev->sb_page, REQ_OP_READ, 0, true)) {
|
||||
DMERR("Failed to read superblock of device at position %d",
|
||||
rdev->raid_disk);
|
||||
md_error(rdev->mddev, rdev);
|
||||
return -EINVAL;
|
||||
set_bit(Faulty, &rdev->flags);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
rdev->sb_loaded = 1;
|
||||
|
@ -1990,7 +2078,7 @@ static int super_load(struct md_rdev *rdev, struct md_rdev *refdev)
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
r = read_disk_sb(rdev, rdev->sb_size);
|
||||
r = read_disk_sb(rdev, rdev->sb_size, false);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
|
@ -2146,6 +2234,9 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
|
|||
*/
|
||||
d = 0;
|
||||
rdev_for_each(r, mddev) {
|
||||
if (test_bit(Journal, &rdev->flags))
|
||||
continue;
|
||||
|
||||
if (test_bit(FirstUse, &r->flags))
|
||||
new_devs++;
|
||||
|
||||
|
@ -2201,7 +2292,8 @@ static int super_init_validation(struct raid_set *rs, struct md_rdev *rdev)
|
|||
*/
|
||||
sb_retrieve_failed_devices(sb, failed_devices);
|
||||
rdev_for_each(r, mddev) {
|
||||
if (!r->sb_page)
|
||||
if (test_bit(Journal, &rdev->flags) ||
|
||||
!r->sb_page)
|
||||
continue;
|
||||
sb2 = page_address(r->sb_page);
|
||||
sb2->failed_devices = 0;
|
||||
|
@ -2253,7 +2345,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
|
|||
struct mddev *mddev = &rs->md;
|
||||
struct dm_raid_superblock *sb;
|
||||
|
||||
if (rs_is_raid0(rs) || !rdev->sb_page)
|
||||
if (rs_is_raid0(rs) || !rdev->sb_page || rdev->raid_disk < 0)
|
||||
return 0;
|
||||
|
||||
sb = page_address(rdev->sb_page);
|
||||
|
@ -2278,7 +2370,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
|
|||
|
||||
/* Enable bitmap creation for RAID levels != 0 */
|
||||
mddev->bitmap_info.offset = rt_is_raid0(rs->raid_type) ? 0 : to_sector(4096);
|
||||
rdev->mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
|
||||
mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
|
||||
|
||||
if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
|
||||
/* Retrieve device size stored in superblock to be prepared for shrink */
|
||||
|
@ -2316,21 +2408,22 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev)
|
|||
static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
||||
{
|
||||
int r;
|
||||
struct raid_dev *dev;
|
||||
struct md_rdev *rdev, *tmp, *freshest;
|
||||
struct md_rdev *rdev, *freshest;
|
||||
struct mddev *mddev = &rs->md;
|
||||
|
||||
freshest = NULL;
|
||||
rdev_for_each_safe(rdev, tmp, mddev) {
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (test_bit(Journal, &rdev->flags))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Skipping super_load due to CTR_FLAG_SYNC will cause
|
||||
* the array to undergo initialization again as
|
||||
* though it were new. This is the intended effect
|
||||
* of the "sync" directive.
|
||||
*
|
||||
* When reshaping capability is added, we must ensure
|
||||
* that the "sync" directive is disallowed during the
|
||||
* reshape.
|
||||
* With reshaping capability added, we must ensure that
|
||||
* that the "sync" directive is disallowed during the reshape.
|
||||
*/
|
||||
if (test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags))
|
||||
continue;
|
||||
|
@ -2347,6 +2440,7 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
|||
case 0:
|
||||
break;
|
||||
default:
|
||||
/* This is a failure to read the superblock from the metadata device. */
|
||||
/*
|
||||
* We have to keep any raid0 data/metadata device pairs or
|
||||
* the MD raid0 personality will fail to start the array.
|
||||
|
@ -2354,33 +2448,16 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
|||
if (rs_is_raid0(rs))
|
||||
continue;
|
||||
|
||||
dev = container_of(rdev, struct raid_dev, rdev);
|
||||
if (dev->meta_dev)
|
||||
dm_put_device(ti, dev->meta_dev);
|
||||
|
||||
dev->meta_dev = NULL;
|
||||
rdev->meta_bdev = NULL;
|
||||
|
||||
if (rdev->sb_page)
|
||||
put_page(rdev->sb_page);
|
||||
|
||||
rdev->sb_page = NULL;
|
||||
|
||||
rdev->sb_loaded = 0;
|
||||
|
||||
/*
|
||||
* We might be able to salvage the data device
|
||||
* even though the meta device has failed. For
|
||||
* now, we behave as though '- -' had been
|
||||
* set for this device in the table.
|
||||
* We keep the dm_devs to be able to emit the device tuple
|
||||
* properly on the table line in raid_status() (rather than
|
||||
* mistakenly acting as if '- -' got passed into the constructor).
|
||||
*
|
||||
* The rdev has to stay on the same_set list to allow for
|
||||
* the attempt to restore faulty devices on second resume.
|
||||
*/
|
||||
if (dev->data_dev)
|
||||
dm_put_device(ti, dev->data_dev);
|
||||
|
||||
dev->data_dev = NULL;
|
||||
rdev->bdev = NULL;
|
||||
|
||||
list_del(&rdev->same_set);
|
||||
rdev->raid_disk = rdev->saved_raid_disk = -1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2401,7 +2478,9 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs)
|
|||
return -EINVAL;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
if ((rdev != freshest) && super_validate(rs, rdev))
|
||||
if (!test_bit(Journal, &rdev->flags) &&
|
||||
rdev != freshest &&
|
||||
super_validate(rs, rdev))
|
||||
return -EINVAL;
|
||||
return 0;
|
||||
}
|
||||
|
@ -2488,10 +2567,12 @@ static int rs_adjust_data_offsets(struct raid_set *rs)
|
|||
return -ENOSPC;
|
||||
}
|
||||
out:
|
||||
/* Adjust data offsets on all rdevs */
|
||||
/* Adjust data offsets on all rdevs but on any raid4/5/6 journal device */
|
||||
rdev_for_each(rdev, &rs->md) {
|
||||
rdev->data_offset = data_offset;
|
||||
rdev->new_data_offset = new_data_offset;
|
||||
if (!test_bit(Journal, &rdev->flags)) {
|
||||
rdev->data_offset = data_offset;
|
||||
rdev->new_data_offset = new_data_offset;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -2504,8 +2585,10 @@ static void __reorder_raid_disk_indexes(struct raid_set *rs)
|
|||
struct md_rdev *rdev;
|
||||
|
||||
rdev_for_each(rdev, &rs->md) {
|
||||
rdev->raid_disk = i++;
|
||||
rdev->saved_raid_disk = rdev->new_raid_disk = -1;
|
||||
if (!test_bit(Journal, &rdev->flags)) {
|
||||
rdev->raid_disk = i++;
|
||||
rdev->saved_raid_disk = rdev->new_raid_disk = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2845,7 +2928,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
if (r)
|
||||
goto bad;
|
||||
|
||||
calculated_dev_sectors = rs->dev[0].rdev.sectors;
|
||||
calculated_dev_sectors = rs->md.dev_sectors;
|
||||
|
||||
/*
|
||||
* Backup any new raid set level, layout, ...
|
||||
|
@ -2858,7 +2941,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
if (r)
|
||||
goto bad;
|
||||
|
||||
resize = calculated_dev_sectors != rs->dev[0].rdev.sectors;
|
||||
resize = calculated_dev_sectors != __rdev_sectors(rs);
|
||||
|
||||
INIT_WORK(&rs->md.event_work, do_table_event);
|
||||
ti->private = rs;
|
||||
|
@ -2902,6 +2985,13 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
goto bad;
|
||||
}
|
||||
|
||||
/* We can't takeover a journaled raid4/5/6 */
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
|
||||
ti->error = "Can't takeover a journaled raid4/5/6 set";
|
||||
r = -EPERM;
|
||||
goto bad;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a takeover is needed, userspace sets any additional
|
||||
* devices to rebuild and we can check for a valid request here.
|
||||
|
@ -2923,6 +3013,18 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
rs_setup_recovery(rs, MaxSector);
|
||||
rs_set_new(rs);
|
||||
} else if (rs_reshape_requested(rs)) {
|
||||
/*
|
||||
* No need to check for 'ongoing' takeover here, because takeover
|
||||
* is an instant operation as oposed to an ongoing reshape.
|
||||
*/
|
||||
|
||||
/* We can't reshape a journaled raid4/5/6 */
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags)) {
|
||||
ti->error = "Can't reshape a journaled raid4/5/6 set";
|
||||
r = -EPERM;
|
||||
goto bad;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can only prepare for a reshape here, because the
|
||||
* raid set needs to run to provide the repective reshape
|
||||
|
@ -3071,18 +3173,23 @@ static const char *decipher_sync_action(struct mddev *mddev)
|
|||
}
|
||||
|
||||
/*
|
||||
* Return status string @rdev
|
||||
* Return status string for @rdev
|
||||
*
|
||||
* Status characters:
|
||||
*
|
||||
* 'D' = Dead/Failed device
|
||||
* 'D' = Dead/Failed raid set component or raid4/5/6 journal device
|
||||
* 'a' = Alive but not in-sync
|
||||
* 'A' = Alive and in-sync
|
||||
* 'A' = Alive and in-sync raid set component or alive raid4/5/6 journal device
|
||||
* '-' = Non-existing device (i.e. uspace passed '- -' into the ctr)
|
||||
*/
|
||||
static const char *__raid_dev_status(struct md_rdev *rdev, bool array_in_sync)
|
||||
{
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
if (!rdev->bdev)
|
||||
return "-";
|
||||
else if (test_bit(Faulty, &rdev->flags))
|
||||
return "D";
|
||||
else if (test_bit(Journal, &rdev->flags))
|
||||
return "A";
|
||||
else if (!array_in_sync || !test_bit(In_sync, &rdev->flags))
|
||||
return "a";
|
||||
else
|
||||
|
@ -3151,7 +3258,8 @@ static sector_t rs_get_progress(struct raid_set *rs,
|
|||
* being initialized.
|
||||
*/
|
||||
rdev_for_each(rdev, mddev)
|
||||
if (!test_bit(In_sync, &rdev->flags))
|
||||
if (!test_bit(Journal, &rdev->flags) &&
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
*array_in_sync = true;
|
||||
#if 0
|
||||
r = 0; /* HM FIXME: TESTME: https://bugzilla.redhat.com/show_bug.cgi?id=1210637 ? */
|
||||
|
@ -3183,7 +3291,6 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
sector_t progress, resync_max_sectors, resync_mismatches;
|
||||
const char *sync_action;
|
||||
struct raid_type *rt;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
switch (type) {
|
||||
case STATUSTYPE_INFO:
|
||||
|
@ -3204,9 +3311,9 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
atomic64_read(&mddev->resync_mismatches) : 0;
|
||||
sync_action = decipher_sync_action(&rs->md);
|
||||
|
||||
/* HM FIXME: do we want another state char for raid0? It shows 'D' or 'A' now */
|
||||
rdev_for_each(rdev, mddev)
|
||||
DMEMIT(__raid_dev_status(rdev, array_in_sync));
|
||||
/* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
DMEMIT(__raid_dev_status(&rs->dev[i].rdev, array_in_sync));
|
||||
|
||||
/*
|
||||
* In-sync/Reshape ratio:
|
||||
|
@ -3252,6 +3359,12 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
* so retrieving it from the first raid disk is sufficient.
|
||||
*/
|
||||
DMEMIT(" %llu", (unsigned long long) rs->dev[0].rdev.data_offset);
|
||||
|
||||
/*
|
||||
* v1.10.0+:
|
||||
*/
|
||||
DMEMIT(" %s", test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ?
|
||||
__raid_dev_status(&rs->journal_dev.rdev, 0) : "-");
|
||||
break;
|
||||
|
||||
case STATUSTYPE_TABLE:
|
||||
|
@ -3265,7 +3378,8 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
raid_param_cnt += rebuild_disks * 2 +
|
||||
write_mostly_params +
|
||||
hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_NO_ARGS) +
|
||||
hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2;
|
||||
hweight32(rs->ctr_flags & CTR_FLAG_OPTIONS_ONE_ARG) * 2 +
|
||||
(test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags) ? 2 : 0);
|
||||
/* Emit table line */
|
||||
DMEMIT("%s %u %u", rs->raid_type->name, raid_param_cnt, mddev->new_chunk_sectors);
|
||||
if (test_bit(__CTR_FLAG_RAID10_FORMAT, &rs->ctr_flags))
|
||||
|
@ -3312,6 +3426,9 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
if (test_bit(__CTR_FLAG_MIN_RECOVERY_RATE, &rs->ctr_flags))
|
||||
DMEMIT(" %s %d", dm_raid_arg_name_by_flag(CTR_FLAG_MIN_RECOVERY_RATE),
|
||||
mddev->sync_speed_min);
|
||||
if (test_bit(__CTR_FLAG_JOURNAL_DEV, &rs->ctr_flags))
|
||||
DMEMIT(" %s %s", dm_raid_arg_name_by_flag(CTR_FLAG_JOURNAL_DEV),
|
||||
__get_dev_name(rs->journal_dev.dev));
|
||||
DMEMIT(" %d", rs->raid_disks);
|
||||
for (i = 0; i < rs->raid_disks; i++)
|
||||
DMEMIT(" %s %s", __get_dev_name(rs->dev[i].meta_dev),
|
||||
|
@ -3347,10 +3464,11 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv)
|
|||
else {
|
||||
if (!strcasecmp(argv[0], "check"))
|
||||
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
else if (!!strcasecmp(argv[0], "repair"))
|
||||
else if (!strcasecmp(argv[0], "repair")) {
|
||||
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
} else
|
||||
return -EINVAL;
|
||||
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
}
|
||||
if (mddev->ro == 2) {
|
||||
/* A write to sync_action is enough to justify
|
||||
|
@ -3427,11 +3545,14 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
|
|||
|
||||
memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
|
||||
|
||||
for (i = 0; i < rs->md.raid_disks; i++) {
|
||||
for (i = 0; i < mddev->raid_disks; i++) {
|
||||
r = &rs->dev[i].rdev;
|
||||
if (test_bit(Faulty, &r->flags) && r->sb_page &&
|
||||
sync_page_io(r, 0, r->sb_size, r->sb_page,
|
||||
REQ_OP_READ, 0, true)) {
|
||||
/* HM FIXME: enhance journal device recovery processing */
|
||||
if (test_bit(Journal, &r->flags))
|
||||
continue;
|
||||
|
||||
if (test_bit(Faulty, &r->flags) &&
|
||||
r->meta_bdev && !read_disk_sb(r, r->sb_size, true)) {
|
||||
DMINFO("Faulty %s device #%d has readable super block."
|
||||
" Attempting to revive it.",
|
||||
rs->raid_type->name, i);
|
||||
|
@ -3445,22 +3566,26 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
|
|||
* '>= 0' - meaning we must call this function
|
||||
* ourselves.
|
||||
*/
|
||||
if ((r->raid_disk >= 0) &&
|
||||
(mddev->pers->hot_remove_disk(mddev, r) != 0))
|
||||
/* Failed to revive this device, try next */
|
||||
continue;
|
||||
|
||||
r->raid_disk = i;
|
||||
r->saved_raid_disk = i;
|
||||
flags = r->flags;
|
||||
clear_bit(In_sync, &r->flags); /* Mandatory for hot remove. */
|
||||
if (r->raid_disk >= 0) {
|
||||
if (mddev->pers->hot_remove_disk(mddev, r)) {
|
||||
/* Failed to revive this device, try next */
|
||||
r->flags = flags;
|
||||
continue;
|
||||
}
|
||||
} else
|
||||
r->raid_disk = r->saved_raid_disk = i;
|
||||
|
||||
clear_bit(Faulty, &r->flags);
|
||||
clear_bit(WriteErrorSeen, &r->flags);
|
||||
clear_bit(In_sync, &r->flags);
|
||||
|
||||
if (mddev->pers->hot_add_disk(mddev, r)) {
|
||||
r->raid_disk = -1;
|
||||
r->saved_raid_disk = -1;
|
||||
/* Failed to revive this device, try next */
|
||||
r->raid_disk = r->saved_raid_disk = -1;
|
||||
r->flags = flags;
|
||||
} else {
|
||||
clear_bit(In_sync, &r->flags);
|
||||
r->recovery_offset = 0;
|
||||
set_bit(i, (void *) cleared_failed_devices);
|
||||
cleared = true;
|
||||
|
@ -3473,6 +3598,9 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
|
|||
uint64_t failed_devices[DISKS_ARRAY_ELEMS];
|
||||
|
||||
rdev_for_each(r, &rs->md) {
|
||||
if (test_bit(Journal, &r->flags))
|
||||
continue;
|
||||
|
||||
sb = page_address(r->sb_page);
|
||||
sb_retrieve_failed_devices(sb, failed_devices);
|
||||
|
||||
|
@ -3651,7 +3779,7 @@ static void raid_resume(struct dm_target *ti)
|
|||
|
||||
static struct target_type raid_target = {
|
||||
.name = "raid",
|
||||
.version = {1, 9, 1},
|
||||
.version = {1, 10, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = raid_ctr,
|
||||
.dtr = raid_dtr,
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
#include <linux/module.h>
|
||||
|
||||
#define DM_MSG_PREFIX "multipath round-robin"
|
||||
#define RR_MIN_IO 1000
|
||||
#define RR_VERSION "1.1.0"
|
||||
#define RR_MIN_IO 1
|
||||
#define RR_VERSION "1.2.0"
|
||||
|
||||
/*-----------------------------------------------------------------
|
||||
* Path-handling code, paths are held in lists
|
||||
|
@ -47,44 +47,19 @@ struct selector {
|
|||
struct list_head valid_paths;
|
||||
struct list_head invalid_paths;
|
||||
spinlock_t lock;
|
||||
struct dm_path * __percpu *current_path;
|
||||
struct percpu_counter repeat_count;
|
||||
};
|
||||
|
||||
static void set_percpu_current_path(struct selector *s, struct dm_path *path)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
*per_cpu_ptr(s->current_path, cpu) = path;
|
||||
}
|
||||
|
||||
static struct selector *alloc_selector(void)
|
||||
{
|
||||
struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
|
||||
|
||||
if (!s)
|
||||
return NULL;
|
||||
|
||||
INIT_LIST_HEAD(&s->valid_paths);
|
||||
INIT_LIST_HEAD(&s->invalid_paths);
|
||||
spin_lock_init(&s->lock);
|
||||
|
||||
s->current_path = alloc_percpu(struct dm_path *);
|
||||
if (!s->current_path)
|
||||
goto out_current_path;
|
||||
set_percpu_current_path(s, NULL);
|
||||
|
||||
if (percpu_counter_init(&s->repeat_count, 0, GFP_KERNEL))
|
||||
goto out_repeat_count;
|
||||
if (s) {
|
||||
INIT_LIST_HEAD(&s->valid_paths);
|
||||
INIT_LIST_HEAD(&s->invalid_paths);
|
||||
spin_lock_init(&s->lock);
|
||||
}
|
||||
|
||||
return s;
|
||||
|
||||
out_repeat_count:
|
||||
free_percpu(s->current_path);
|
||||
out_current_path:
|
||||
kfree(s);
|
||||
return NULL;;
|
||||
}
|
||||
|
||||
static int rr_create(struct path_selector *ps, unsigned argc, char **argv)
|
||||
|
@ -105,8 +80,6 @@ static void rr_destroy(struct path_selector *ps)
|
|||
|
||||
free_paths(&s->valid_paths);
|
||||
free_paths(&s->invalid_paths);
|
||||
free_percpu(s->current_path);
|
||||
percpu_counter_destroy(&s->repeat_count);
|
||||
kfree(s);
|
||||
ps->context = NULL;
|
||||
}
|
||||
|
@ -157,6 +130,11 @@ static int rr_add_path(struct path_selector *ps, struct dm_path *path,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (repeat_count > 1) {
|
||||
DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead");
|
||||
repeat_count = 1;
|
||||
}
|
||||
|
||||
/* allocate the path */
|
||||
pi = kmalloc(sizeof(*pi), GFP_KERNEL);
|
||||
if (!pi) {
|
||||
|
@ -183,9 +161,6 @@ static void rr_fail_path(struct path_selector *ps, struct dm_path *p)
|
|||
struct path_info *pi = p->pscontext;
|
||||
|
||||
spin_lock_irqsave(&s->lock, flags);
|
||||
if (p == *this_cpu_ptr(s->current_path))
|
||||
set_percpu_current_path(s, NULL);
|
||||
|
||||
list_move(&pi->list, &s->invalid_paths);
|
||||
spin_unlock_irqrestore(&s->lock, flags);
|
||||
}
|
||||
|
@ -208,29 +183,15 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes)
|
|||
unsigned long flags;
|
||||
struct selector *s = ps->context;
|
||||
struct path_info *pi = NULL;
|
||||
struct dm_path *current_path = NULL;
|
||||
|
||||
local_irq_save(flags);
|
||||
current_path = *this_cpu_ptr(s->current_path);
|
||||
if (current_path) {
|
||||
percpu_counter_dec(&s->repeat_count);
|
||||
if (percpu_counter_read_positive(&s->repeat_count) > 0) {
|
||||
local_irq_restore(flags);
|
||||
return current_path;
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&s->lock);
|
||||
spin_lock_irqsave(&s->lock, flags);
|
||||
if (!list_empty(&s->valid_paths)) {
|
||||
pi = list_entry(s->valid_paths.next, struct path_info, list);
|
||||
list_move_tail(&pi->list, &s->valid_paths);
|
||||
percpu_counter_set(&s->repeat_count, pi->repeat_count);
|
||||
set_percpu_current_path(s, pi->path);
|
||||
current_path = pi->path;
|
||||
}
|
||||
spin_unlock_irqrestore(&s->lock, flags);
|
||||
|
||||
return current_path;
|
||||
return pi ? pi->path : NULL;
|
||||
}
|
||||
|
||||
static struct path_selector_type rr_ps = {
|
||||
|
|
|
@ -175,6 +175,7 @@ static void dm_stat_free(struct rcu_head *head)
|
|||
int cpu;
|
||||
struct dm_stat *s = container_of(head, struct dm_stat, rcu_head);
|
||||
|
||||
kfree(s->histogram_boundaries);
|
||||
kfree(s->program_id);
|
||||
kfree(s->aux_data);
|
||||
for_each_possible_cpu(cpu) {
|
||||
|
|
|
@ -974,10 +974,61 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
|
||||
|
||||
/*
|
||||
* Flush current->bio_list when the target map method blocks.
|
||||
* This fixes deadlocks in snapshot and possibly in other targets.
|
||||
*/
|
||||
struct dm_offload {
|
||||
struct blk_plug plug;
|
||||
struct blk_plug_cb cb;
|
||||
};
|
||||
|
||||
static void flush_current_bio_list(struct blk_plug_cb *cb, bool from_schedule)
|
||||
{
|
||||
struct dm_offload *o = container_of(cb, struct dm_offload, cb);
|
||||
struct bio_list list;
|
||||
struct bio *bio;
|
||||
|
||||
INIT_LIST_HEAD(&o->cb.list);
|
||||
|
||||
if (unlikely(!current->bio_list))
|
||||
return;
|
||||
|
||||
list = *current->bio_list;
|
||||
bio_list_init(current->bio_list);
|
||||
|
||||
while ((bio = bio_list_pop(&list))) {
|
||||
struct bio_set *bs = bio->bi_pool;
|
||||
if (unlikely(!bs) || bs == fs_bio_set) {
|
||||
bio_list_add(current->bio_list, bio);
|
||||
continue;
|
||||
}
|
||||
|
||||
spin_lock(&bs->rescue_lock);
|
||||
bio_list_add(&bs->rescue_list, bio);
|
||||
queue_work(bs->rescue_workqueue, &bs->rescue_work);
|
||||
spin_unlock(&bs->rescue_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void dm_offload_start(struct dm_offload *o)
|
||||
{
|
||||
blk_start_plug(&o->plug);
|
||||
o->cb.callback = flush_current_bio_list;
|
||||
list_add(&o->cb.list, ¤t->plug->cb_list);
|
||||
}
|
||||
|
||||
static void dm_offload_end(struct dm_offload *o)
|
||||
{
|
||||
list_del(&o->cb.list);
|
||||
blk_finish_plug(&o->plug);
|
||||
}
|
||||
|
||||
static void __map_bio(struct dm_target_io *tio)
|
||||
{
|
||||
int r;
|
||||
sector_t sector;
|
||||
struct dm_offload o;
|
||||
struct bio *clone = &tio->clone;
|
||||
struct dm_target *ti = tio->ti;
|
||||
|
||||
|
@ -990,7 +1041,11 @@ static void __map_bio(struct dm_target_io *tio)
|
|||
*/
|
||||
atomic_inc(&tio->io->io_count);
|
||||
sector = clone->bi_iter.bi_sector;
|
||||
|
||||
dm_offload_start(&o);
|
||||
r = ti->type->map(ti, clone);
|
||||
dm_offload_end(&o);
|
||||
|
||||
if (r == DM_MAPIO_REMAPPED) {
|
||||
/* the bio has been remapped so dispatch it */
|
||||
|
||||
|
|
|
@ -976,6 +976,27 @@ int dm_array_cursor_next(struct dm_array_cursor *c)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dm_array_cursor_next);
|
||||
|
||||
int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count)
|
||||
{
|
||||
int r;
|
||||
|
||||
do {
|
||||
uint32_t remaining = le32_to_cpu(c->ab->nr_entries) - c->index;
|
||||
|
||||
if (count < remaining) {
|
||||
c->index += count;
|
||||
return 0;
|
||||
}
|
||||
|
||||
count -= remaining;
|
||||
r = dm_array_cursor_next(c);
|
||||
|
||||
} while (!r);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_array_cursor_skip);
|
||||
|
||||
void dm_array_cursor_get_value(struct dm_array_cursor *c, void **value_le)
|
||||
{
|
||||
*value_le = element_at(c->info, c->ab, c->index);
|
||||
|
|
|
@ -207,6 +207,7 @@ void dm_array_cursor_end(struct dm_array_cursor *c);
|
|||
|
||||
uint32_t dm_array_cursor_index(struct dm_array_cursor *c);
|
||||
int dm_array_cursor_next(struct dm_array_cursor *c);
|
||||
int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count);
|
||||
|
||||
/*
|
||||
* value_le is only valid while the cursor points at the current value.
|
||||
|
|
|
@ -39,6 +39,48 @@ int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *root)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bitset_empty);
|
||||
|
||||
struct packer_context {
|
||||
bit_value_fn fn;
|
||||
unsigned nr_bits;
|
||||
void *context;
|
||||
};
|
||||
|
||||
static int pack_bits(uint32_t index, void *value, void *context)
|
||||
{
|
||||
int r;
|
||||
struct packer_context *p = context;
|
||||
unsigned bit, nr = min(64u, p->nr_bits - (index * 64));
|
||||
uint64_t word = 0;
|
||||
bool bv;
|
||||
|
||||
for (bit = 0; bit < nr; bit++) {
|
||||
r = p->fn(index * 64 + bit, &bv, p->context);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (bv)
|
||||
set_bit(bit, (unsigned long *) &word);
|
||||
else
|
||||
clear_bit(bit, (unsigned long *) &word);
|
||||
}
|
||||
|
||||
*((__le64 *) value) = cpu_to_le64(word);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dm_bitset_new(struct dm_disk_bitset *info, dm_block_t *root,
|
||||
uint32_t size, bit_value_fn fn, void *context)
|
||||
{
|
||||
struct packer_context p;
|
||||
p.fn = fn;
|
||||
p.nr_bits = size;
|
||||
p.context = context;
|
||||
|
||||
return dm_array_new(&info->array_info, root, dm_div_up(size, 64), pack_bits, &p);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bitset_new);
|
||||
|
||||
int dm_bitset_resize(struct dm_disk_bitset *info, dm_block_t root,
|
||||
uint32_t old_nr_entries, uint32_t new_nr_entries,
|
||||
bool default_value, dm_block_t *new_root)
|
||||
|
@ -168,4 +210,108 @@ int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root,
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bitset_test_bit);
|
||||
|
||||
static int cursor_next_array_entry(struct dm_bitset_cursor *c)
|
||||
{
|
||||
int r;
|
||||
__le64 *value;
|
||||
|
||||
r = dm_array_cursor_next(&c->cursor);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
dm_array_cursor_get_value(&c->cursor, (void **) &value);
|
||||
c->array_index++;
|
||||
c->bit_index = 0;
|
||||
c->current_bits = le64_to_cpu(*value);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int dm_bitset_cursor_begin(struct dm_disk_bitset *info,
|
||||
dm_block_t root, uint32_t nr_entries,
|
||||
struct dm_bitset_cursor *c)
|
||||
{
|
||||
int r;
|
||||
__le64 *value;
|
||||
|
||||
if (!nr_entries)
|
||||
return -ENODATA;
|
||||
|
||||
c->info = info;
|
||||
c->entries_remaining = nr_entries;
|
||||
|
||||
r = dm_array_cursor_begin(&info->array_info, root, &c->cursor);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
dm_array_cursor_get_value(&c->cursor, (void **) &value);
|
||||
c->array_index = 0;
|
||||
c->bit_index = 0;
|
||||
c->current_bits = le64_to_cpu(*value);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bitset_cursor_begin);
|
||||
|
||||
void dm_bitset_cursor_end(struct dm_bitset_cursor *c)
|
||||
{
|
||||
return dm_array_cursor_end(&c->cursor);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bitset_cursor_end);
|
||||
|
||||
int dm_bitset_cursor_next(struct dm_bitset_cursor *c)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
if (!c->entries_remaining)
|
||||
return -ENODATA;
|
||||
|
||||
c->entries_remaining--;
|
||||
if (++c->bit_index > 63)
|
||||
r = cursor_next_array_entry(c);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bitset_cursor_next);
|
||||
|
||||
int dm_bitset_cursor_skip(struct dm_bitset_cursor *c, uint32_t count)
|
||||
{
|
||||
int r;
|
||||
__le64 *value;
|
||||
uint32_t nr_array_skip;
|
||||
uint32_t remaining_in_word = 64 - c->bit_index;
|
||||
|
||||
if (c->entries_remaining < count)
|
||||
return -ENODATA;
|
||||
|
||||
if (count < remaining_in_word) {
|
||||
c->bit_index += count;
|
||||
c->entries_remaining -= count;
|
||||
return 0;
|
||||
|
||||
} else {
|
||||
c->entries_remaining -= remaining_in_word;
|
||||
count -= remaining_in_word;
|
||||
}
|
||||
|
||||
nr_array_skip = (count / 64) + 1;
|
||||
r = dm_array_cursor_skip(&c->cursor, nr_array_skip);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
dm_array_cursor_get_value(&c->cursor, (void **) &value);
|
||||
c->entries_remaining -= count;
|
||||
c->array_index += nr_array_skip;
|
||||
c->bit_index = count & 63;
|
||||
c->current_bits = le64_to_cpu(*value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bitset_cursor_skip);
|
||||
|
||||
bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c)
|
||||
{
|
||||
return test_bit(c->bit_index, (unsigned long *) &c->current_bits);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_bitset_cursor_get_value);
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
|
|
@ -92,6 +92,22 @@ void dm_disk_bitset_init(struct dm_transaction_manager *tm,
|
|||
*/
|
||||
int dm_bitset_empty(struct dm_disk_bitset *info, dm_block_t *new_root);
|
||||
|
||||
/*
|
||||
* Creates a new bitset populated with values provided by a callback
|
||||
* function. This is more efficient than creating an empty bitset,
|
||||
* resizing, and then setting values since that process incurs a lot of
|
||||
* copying.
|
||||
*
|
||||
* info - describes the array
|
||||
* root - the root block of the array on disk
|
||||
* size - the number of entries in the array
|
||||
* fn - the callback
|
||||
* context - passed to the callback
|
||||
*/
|
||||
typedef int (*bit_value_fn)(uint32_t index, bool *value, void *context);
|
||||
int dm_bitset_new(struct dm_disk_bitset *info, dm_block_t *root,
|
||||
uint32_t size, bit_value_fn fn, void *context);
|
||||
|
||||
/*
|
||||
* Resize the bitset.
|
||||
*
|
||||
|
@ -161,6 +177,29 @@ int dm_bitset_test_bit(struct dm_disk_bitset *info, dm_block_t root,
|
|||
int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
|
||||
dm_block_t *new_root);
|
||||
|
||||
struct dm_bitset_cursor {
|
||||
struct dm_disk_bitset *info;
|
||||
struct dm_array_cursor cursor;
|
||||
|
||||
uint32_t entries_remaining;
|
||||
uint32_t array_index;
|
||||
uint32_t bit_index;
|
||||
uint64_t current_bits;
|
||||
};
|
||||
|
||||
/*
|
||||
* Make sure you've flush any dm_disk_bitset and updated the root before
|
||||
* using this.
|
||||
*/
|
||||
int dm_bitset_cursor_begin(struct dm_disk_bitset *info,
|
||||
dm_block_t root, uint32_t nr_entries,
|
||||
struct dm_bitset_cursor *c);
|
||||
void dm_bitset_cursor_end(struct dm_bitset_cursor *c);
|
||||
|
||||
int dm_bitset_cursor_next(struct dm_bitset_cursor *c);
|
||||
int dm_bitset_cursor_skip(struct dm_bitset_cursor *c, uint32_t count);
|
||||
bool dm_bitset_cursor_get_value(struct dm_bitset_cursor *c);
|
||||
|
||||
/*----------------------------------------------------------------*/
|
||||
|
||||
#endif /* _LINUX_DM_BITSET_H */
|
||||
|
|
|
@ -462,7 +462,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
|
|||
int r;
|
||||
|
||||
p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
|
||||
if (IS_ERR(p))
|
||||
if (unlikely(IS_ERR(p)))
|
||||
return PTR_ERR(p);
|
||||
|
||||
aux = dm_bufio_get_aux_data(to_buffer(*result));
|
||||
|
@ -498,7 +498,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm,
|
|||
return -EPERM;
|
||||
|
||||
p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
|
||||
if (IS_ERR(p))
|
||||
if (unlikely(IS_ERR(p)))
|
||||
return PTR_ERR(p);
|
||||
|
||||
aux = dm_bufio_get_aux_data(to_buffer(*result));
|
||||
|
@ -531,7 +531,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm,
|
|||
int r;
|
||||
|
||||
p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result);
|
||||
if (IS_ERR(p))
|
||||
if (unlikely(IS_ERR(p)))
|
||||
return PTR_ERR(p);
|
||||
if (unlikely(!p))
|
||||
return -EWOULDBLOCK;
|
||||
|
@ -567,7 +567,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm,
|
|||
return -EPERM;
|
||||
|
||||
p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result);
|
||||
if (IS_ERR(p))
|
||||
if (unlikely(IS_ERR(p)))
|
||||
return PTR_ERR(p);
|
||||
|
||||
memset(p, 0, dm_bm_block_size(bm));
|
||||
|
|
|
@ -272,7 +272,12 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
|
|||
int r;
|
||||
struct del_stack *s;
|
||||
|
||||
s = kmalloc(sizeof(*s), GFP_NOIO);
|
||||
/*
|
||||
* dm_btree_del() is called via an ioctl, as such should be
|
||||
* considered an FS op. We can't recurse back into the FS, so we
|
||||
* allocate GFP_NOFS.
|
||||
*/
|
||||
s = kmalloc(sizeof(*s), GFP_NOFS);
|
||||
if (!s)
|
||||
return -ENOMEM;
|
||||
s->info = info;
|
||||
|
@ -1139,6 +1144,17 @@ int dm_btree_cursor_next(struct dm_btree_cursor *c)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(dm_btree_cursor_next);
|
||||
|
||||
int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count)
|
||||
{
|
||||
int r = 0;
|
||||
|
||||
while (count-- && !r)
|
||||
r = dm_btree_cursor_next(c);
|
||||
|
||||
return r;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(dm_btree_cursor_skip);
|
||||
|
||||
int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le)
|
||||
{
|
||||
if (c->depth) {
|
||||
|
|
|
@ -209,6 +209,7 @@ int dm_btree_cursor_begin(struct dm_btree_info *info, dm_block_t root,
|
|||
bool prefetch_leaves, struct dm_btree_cursor *c);
|
||||
void dm_btree_cursor_end(struct dm_btree_cursor *c);
|
||||
int dm_btree_cursor_next(struct dm_btree_cursor *c);
|
||||
int dm_btree_cursor_skip(struct dm_btree_cursor *c, uint32_t count);
|
||||
int dm_btree_cursor_get_value(struct dm_btree_cursor *c, uint64_t *key, void *value_le);
|
||||
|
||||
#endif /* _LINUX_DM_BTREE_H */
|
||||
|
|
|
@ -626,13 +626,19 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
|
|||
void *root_le, size_t len)
|
||||
{
|
||||
int r;
|
||||
struct disk_sm_root *smr = root_le;
|
||||
struct disk_sm_root smr;
|
||||
|
||||
if (len < sizeof(struct disk_sm_root)) {
|
||||
DMERR("sm_metadata root too small");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't know the alignment of the root_le buffer, so need to
|
||||
* copy into a new structure.
|
||||
*/
|
||||
memcpy(&smr, root_le, sizeof(smr));
|
||||
|
||||
r = sm_ll_init(ll, tm);
|
||||
if (r < 0)
|
||||
return r;
|
||||
|
@ -644,10 +650,10 @@ int sm_ll_open_metadata(struct ll_disk *ll, struct dm_transaction_manager *tm,
|
|||
ll->max_entries = metadata_ll_max_entries;
|
||||
ll->commit = metadata_ll_commit;
|
||||
|
||||
ll->nr_blocks = le64_to_cpu(smr->nr_blocks);
|
||||
ll->nr_allocated = le64_to_cpu(smr->nr_allocated);
|
||||
ll->bitmap_root = le64_to_cpu(smr->bitmap_root);
|
||||
ll->ref_count_root = le64_to_cpu(smr->ref_count_root);
|
||||
ll->nr_blocks = le64_to_cpu(smr.nr_blocks);
|
||||
ll->nr_allocated = le64_to_cpu(smr.nr_allocated);
|
||||
ll->bitmap_root = le64_to_cpu(smr.bitmap_root);
|
||||
ll->ref_count_root = le64_to_cpu(smr.ref_count_root);
|
||||
|
||||
return ll->open_index(ll);
|
||||
}
|
||||
|
|
|
@ -544,7 +544,7 @@ static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t
|
|||
|
||||
static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
|
||||
|
||||
static struct dm_space_map ops = {
|
||||
static const struct dm_space_map ops = {
|
||||
.destroy = sm_metadata_destroy,
|
||||
.extend = sm_metadata_extend,
|
||||
.get_nr_blocks = sm_metadata_get_nr_blocks,
|
||||
|
@ -671,7 +671,7 @@ static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
|
|||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct dm_space_map bootstrap_ops = {
|
||||
static const struct dm_space_map bootstrap_ops = {
|
||||
.destroy = sm_bootstrap_destroy,
|
||||
.extend = sm_bootstrap_extend,
|
||||
.get_nr_blocks = sm_bootstrap_get_nr_blocks,
|
||||
|
|
Loading…
Add table
Reference in a new issue