2023-11-16 21:24:06 -05:00
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright 2023 Red Hat
*/
# include <linux/atomic.h>
# include <linux/bitops.h>
# include <linux/completion.h>
# include <linux/delay.h>
# include <linux/device-mapper.h>
2023-11-30 19:54:56 -05:00
# include <linux/err.h>
2023-11-16 21:24:06 -05:00
# include <linux/module.h>
# include <linux/mutex.h>
# include <linux/spinlock.h>
2024-01-26 21:18:33 -05:00
# include "admin-state.h"
# include "block-map.h"
# include "completion.h"
# include "constants.h"
# include "data-vio.h"
# include "dedupe.h"
# include "dump.h"
# include "encodings.h"
# include "errors.h"
# include "flush.h"
# include "io-submitter.h"
# include "logger.h"
# include "memory-alloc.h"
# include "message-stats.h"
# include "recovery-journal.h"
# include "repair.h"
# include "slab-depot.h"
# include "status-codes.h"
# include "string-utils.h"
# include "thread-device.h"
# include "thread-registry.h"
2024-03-01 18:29:05 -05:00
# include "thread-utils.h"
2024-01-26 21:18:33 -05:00
# include "types.h"
# include "vdo.h"
# include "vio.h"
2023-11-16 21:24:06 -05:00
2024-02-26 17:04:43 -05:00
enum admin_phases {
2023-11-16 21:24:06 -05:00
GROW_LOGICAL_PHASE_START ,
GROW_LOGICAL_PHASE_GROW_BLOCK_MAP ,
GROW_LOGICAL_PHASE_END ,
GROW_LOGICAL_PHASE_ERROR ,
GROW_PHYSICAL_PHASE_START ,
GROW_PHYSICAL_PHASE_COPY_SUMMARY ,
GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS ,
GROW_PHYSICAL_PHASE_USE_NEW_SLABS ,
GROW_PHYSICAL_PHASE_END ,
GROW_PHYSICAL_PHASE_ERROR ,
LOAD_PHASE_START ,
LOAD_PHASE_LOAD_DEPOT ,
LOAD_PHASE_MAKE_DIRTY ,
LOAD_PHASE_PREPARE_TO_ALLOCATE ,
LOAD_PHASE_SCRUB_SLABS ,
LOAD_PHASE_DATA_REDUCTION ,
LOAD_PHASE_FINISHED ,
LOAD_PHASE_DRAIN_JOURNAL ,
LOAD_PHASE_WAIT_FOR_READ_ONLY ,
PRE_LOAD_PHASE_START ,
PRE_LOAD_PHASE_LOAD_COMPONENTS ,
PRE_LOAD_PHASE_END ,
PREPARE_GROW_PHYSICAL_PHASE_START ,
RESUME_PHASE_START ,
RESUME_PHASE_ALLOW_READ_ONLY_MODE ,
RESUME_PHASE_DEDUPE ,
RESUME_PHASE_DEPOT ,
RESUME_PHASE_JOURNAL ,
RESUME_PHASE_BLOCK_MAP ,
RESUME_PHASE_LOGICAL_ZONES ,
RESUME_PHASE_PACKER ,
RESUME_PHASE_FLUSHER ,
RESUME_PHASE_DATA_VIOS ,
RESUME_PHASE_END ,
SUSPEND_PHASE_START ,
SUSPEND_PHASE_PACKER ,
SUSPEND_PHASE_DATA_VIOS ,
SUSPEND_PHASE_DEDUPE ,
SUSPEND_PHASE_FLUSHES ,
SUSPEND_PHASE_LOGICAL_ZONES ,
SUSPEND_PHASE_BLOCK_MAP ,
SUSPEND_PHASE_JOURNAL ,
SUSPEND_PHASE_DEPOT ,
SUSPEND_PHASE_READ_ONLY_WAIT ,
SUSPEND_PHASE_WRITE_SUPER_BLOCK ,
SUSPEND_PHASE_END ,
} ;
static const char * const ADMIN_PHASE_NAMES [ ] = {
" GROW_LOGICAL_PHASE_START " ,
" GROW_LOGICAL_PHASE_GROW_BLOCK_MAP " ,
" GROW_LOGICAL_PHASE_END " ,
" GROW_LOGICAL_PHASE_ERROR " ,
" GROW_PHYSICAL_PHASE_START " ,
" GROW_PHYSICAL_PHASE_COPY_SUMMARY " ,
" GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS " ,
" GROW_PHYSICAL_PHASE_USE_NEW_SLABS " ,
" GROW_PHYSICAL_PHASE_END " ,
" GROW_PHYSICAL_PHASE_ERROR " ,
" LOAD_PHASE_START " ,
" LOAD_PHASE_LOAD_DEPOT " ,
" LOAD_PHASE_MAKE_DIRTY " ,
" LOAD_PHASE_PREPARE_TO_ALLOCATE " ,
" LOAD_PHASE_SCRUB_SLABS " ,
" LOAD_PHASE_DATA_REDUCTION " ,
" LOAD_PHASE_FINISHED " ,
" LOAD_PHASE_DRAIN_JOURNAL " ,
" LOAD_PHASE_WAIT_FOR_READ_ONLY " ,
" PRE_LOAD_PHASE_START " ,
" PRE_LOAD_PHASE_LOAD_COMPONENTS " ,
" PRE_LOAD_PHASE_END " ,
" PREPARE_GROW_PHYSICAL_PHASE_START " ,
" RESUME_PHASE_START " ,
" RESUME_PHASE_ALLOW_READ_ONLY_MODE " ,
" RESUME_PHASE_DEDUPE " ,
" RESUME_PHASE_DEPOT " ,
" RESUME_PHASE_JOURNAL " ,
" RESUME_PHASE_BLOCK_MAP " ,
" RESUME_PHASE_LOGICAL_ZONES " ,
" RESUME_PHASE_PACKER " ,
" RESUME_PHASE_FLUSHER " ,
" RESUME_PHASE_DATA_VIOS " ,
" RESUME_PHASE_END " ,
" SUSPEND_PHASE_START " ,
" SUSPEND_PHASE_PACKER " ,
" SUSPEND_PHASE_DATA_VIOS " ,
" SUSPEND_PHASE_DEDUPE " ,
" SUSPEND_PHASE_FLUSHES " ,
" SUSPEND_PHASE_LOGICAL_ZONES " ,
" SUSPEND_PHASE_BLOCK_MAP " ,
" SUSPEND_PHASE_JOURNAL " ,
" SUSPEND_PHASE_DEPOT " ,
" SUSPEND_PHASE_READ_ONLY_WAIT " ,
" SUSPEND_PHASE_WRITE_SUPER_BLOCK " ,
" SUSPEND_PHASE_END " ,
} ;
2024-02-26 17:04:43 -05:00
/* If we bump this, update the arrays below */
# define TABLE_VERSION 4
2023-11-16 21:24:06 -05:00
/* arrays for handling different table versions */
static const u8 REQUIRED_ARGC [ ] = { 10 , 12 , 9 , 7 , 6 } ;
/* pool name no longer used. only here for verification of older versions */
static const u8 POOL_NAME_ARG_INDEX [ ] = { 8 , 10 , 8 } ;
/*
* Track in - use instance numbers using a flat bit array .
*
* O ( n ) run time isn ' t ideal , but if we have 1000 VDO devices in use simultaneously we still only
* need to scan 16 words , so it ' s not likely to be a big deal compared to other resource usage .
*/
2024-02-26 17:04:43 -05:00
/*
* This minimum size for the bit array creates a numbering space of 0 - 999 , which allows
* successive starts of the same volume to have different instance numbers in any
* reasonably - sized test . Changing instances on restart allows vdoMonReport to detect that
* the ephemeral stats have reset to zero .
*/
# define BIT_COUNT_MINIMUM 1000
/* Grow the bit array by this many bits when needed */
# define BIT_COUNT_INCREMENT 100
2023-11-16 21:24:06 -05:00
struct instance_tracker {
unsigned int bit_count ;
unsigned long * words ;
unsigned int count ;
unsigned int next ;
} ;
static DEFINE_MUTEX ( instances_lock ) ;
static struct instance_tracker instances ;
/**
* free_device_config ( ) - Free a device config created by parse_device_config ( ) .
* @ config : The config to free .
*/
static void free_device_config ( struct device_config * config )
{
if ( config = = NULL )
return ;
if ( config - > owned_device ! = NULL )
dm_put_device ( config - > owning_target , config - > owned_device ) ;
2024-02-13 10:55:50 -06:00
vdo_free ( config - > parent_device_name ) ;
vdo_free ( config - > original_string ) ;
2023-11-16 21:24:06 -05:00
/* Reduce the chance a use-after-free (as in BZ 1669960) happens to work. */
memset ( config , 0 , sizeof ( * config ) ) ;
2024-02-13 10:55:50 -06:00
vdo_free ( config ) ;
2023-11-16 21:24:06 -05:00
}
/**
* get_version_number ( ) - Decide the version number from argv .
*
* @ argc : The number of table values .
* @ argv : The array of table values .
* @ error_ptr : A pointer to return a error string in .
* @ version_ptr : A pointer to return the version .
*
* Return : VDO_SUCCESS or an error code .
*/
static int get_version_number ( int argc , char * * argv , char * * error_ptr ,
unsigned int * version_ptr )
{
/* version, if it exists, is in a form of V<n> */
if ( sscanf ( argv [ 0 ] , " V%u " , version_ptr ) = = 1 ) {
if ( * version_ptr < 1 | | * version_ptr > TABLE_VERSION ) {
* error_ptr = " Unknown version number detected " ;
return VDO_BAD_CONFIGURATION ;
}
} else {
/* V0 actually has no version number in the table string */
* version_ptr = 0 ;
}
/*
* V0 and V1 have no optional parameters . There will always be a parameter for thread
* config , even if it ' s a " . " to show it ' s an empty list .
*/
if ( * version_ptr < = 1 ) {
if ( argc ! = REQUIRED_ARGC [ * version_ptr ] ) {
* error_ptr = " Incorrect number of arguments for version " ;
return VDO_BAD_CONFIGURATION ;
}
} else if ( argc < REQUIRED_ARGC [ * version_ptr ] ) {
* error_ptr = " Incorrect number of arguments for version " ;
return VDO_BAD_CONFIGURATION ;
}
if ( * version_ptr ! = TABLE_VERSION ) {
2024-02-14 09:22:04 -06:00
vdo_log_warning ( " Detected version mismatch between kernel module and tools kernel: %d, tool: %d " ,
2023-11-16 21:24:06 -05:00
TABLE_VERSION , * version_ptr ) ;
2024-02-14 09:22:04 -06:00
vdo_log_warning ( " Please consider upgrading management tools to match kernel. " ) ;
2023-11-16 21:24:06 -05:00
}
return VDO_SUCCESS ;
}
/* Free a list of non-NULL string pointers, and then the list itself. */
static void free_string_array ( char * * string_array )
{
unsigned int offset ;
for ( offset = 0 ; string_array [ offset ] ! = NULL ; offset + + )
2024-02-13 10:55:50 -06:00
vdo_free ( string_array [ offset ] ) ;
vdo_free ( string_array ) ;
2023-11-16 21:24:06 -05:00
}
/*
* Split the input string into substrings , separated at occurrences of the indicated character ,
* returning a null - terminated list of string pointers .
*
2024-02-13 10:55:50 -06:00
* The string pointers and the pointer array itself should both be freed with vdo_free ( ) when no
2023-11-16 21:24:06 -05:00
* longer needed . This can be done with vdo_free_string_array ( below ) if the pointers in the array
* are not changed . Since the array and copied strings are allocated by this function , it may only
* be used in contexts where allocation is permitted .
*
* Empty substrings are not ignored ; that is , returned substrings may be empty strings if the
* separator occurs twice in a row .
*/
static int split_string ( const char * string , char separator , char * * * substring_array_ptr )
{
unsigned int current_substring = 0 , substring_count = 1 ;
const char * s ;
char * * substrings ;
int result ;
ptrdiff_t length ;
for ( s = string ; * s ! = 0 ; s + + ) {
if ( * s = = separator )
substring_count + + ;
}
2024-02-13 10:55:50 -06:00
result = vdo_allocate ( substring_count + 1 , char * , " string-splitting array " ,
2023-11-16 21:24:06 -05:00
& substrings ) ;
2024-02-13 12:06:53 -06:00
if ( result ! = VDO_SUCCESS )
2023-11-16 21:24:06 -05:00
return result ;
for ( s = string ; * s ! = 0 ; s + + ) {
if ( * s = = separator ) {
ptrdiff_t length = s - string ;
2024-02-13 10:55:50 -06:00
result = vdo_allocate ( length + 1 , char , " split string " ,
2023-11-16 21:24:06 -05:00
& substrings [ current_substring ] ) ;
2024-02-13 12:06:53 -06:00
if ( result ! = VDO_SUCCESS ) {
2023-11-16 21:24:06 -05:00
free_string_array ( substrings ) ;
return result ;
}
/*
* Trailing NUL is already in place after allocation ; deal with the zero or
* more non - NUL bytes in the string .
*/
if ( length > 0 )
memcpy ( substrings [ current_substring ] , string , length ) ;
string = s + 1 ;
current_substring + + ;
BUG_ON ( current_substring > = substring_count ) ;
}
}
/* Process final string, with no trailing separator. */
BUG_ON ( current_substring ! = ( substring_count - 1 ) ) ;
length = strlen ( string ) ;
2024-02-13 10:55:50 -06:00
result = vdo_allocate ( length + 1 , char , " split string " ,
2023-11-16 21:24:06 -05:00
& substrings [ current_substring ] ) ;
2024-02-13 12:06:53 -06:00
if ( result ! = VDO_SUCCESS ) {
2023-11-16 21:24:06 -05:00
free_string_array ( substrings ) ;
return result ;
}
memcpy ( substrings [ current_substring ] , string , length ) ;
current_substring + + ;
/* substrings[current_substring] is NULL already */
* substring_array_ptr = substrings ;
2024-02-13 16:03:47 -06:00
return VDO_SUCCESS ;
2023-11-16 21:24:06 -05:00
}
/*
* Join the input substrings into one string , joined with the indicated character , returning a
* string . array_length is a bound on the number of valid elements in substring_array , in case it
* is not NULL - terminated .
*/
static int join_strings ( char * * substring_array , size_t array_length , char separator ,
char * * string_ptr )
{
size_t string_length = 0 ;
size_t i ;
int result ;
char * output , * current_position ;
for ( i = 0 ; ( i < array_length ) & & ( substring_array [ i ] ! = NULL ) ; i + + )
string_length + = strlen ( substring_array [ i ] ) + 1 ;
2024-02-13 10:55:50 -06:00
result = vdo_allocate ( string_length , char , __func__ , & output ) ;
2023-11-16 21:24:06 -05:00
if ( result ! = VDO_SUCCESS )
return result ;
current_position = & output [ 0 ] ;
for ( i = 0 ; ( i < array_length ) & & ( substring_array [ i ] ! = NULL ) ; i + + ) {
2024-02-14 09:34:46 -06:00
current_position = vdo_append_to_buffer ( current_position ,
2023-11-16 21:24:06 -05:00
output + string_length , " %s " ,
substring_array [ i ] ) ;
* current_position = separator ;
current_position + + ;
}
/* We output one too many separators; replace the last with a zero byte. */
if ( current_position ! = output )
* ( current_position - 1 ) = ' \0 ' ;
* string_ptr = output ;
2024-02-13 16:03:47 -06:00
return VDO_SUCCESS ;
2023-11-16 21:24:06 -05:00
}
/**
* parse_bool ( ) - Parse a two - valued option into a bool .
* @ bool_str : The string value to convert to a bool .
* @ true_str : The string value which should be converted to true .
* @ false_str : The string value which should be converted to false .
* @ bool_ptr : A pointer to return the bool value in .
*
* Return : VDO_SUCCESS or an error if bool_str is neither true_str nor false_str .
*/
static inline int __must_check parse_bool ( const char * bool_str , const char * true_str ,
const char * false_str , bool * bool_ptr )
{
bool value = false ;
if ( strcmp ( bool_str , true_str ) = = 0 )
value = true ;
else if ( strcmp ( bool_str , false_str ) = = 0 )
value = false ;
else
return VDO_BAD_CONFIGURATION ;
* bool_ptr = value ;
return VDO_SUCCESS ;
}
/**
* process_one_thread_config_spec ( ) - Process one component of a thread parameter configuration
* string and update the configuration data structure .
* @ thread_param_type : The type of thread specified .
* @ count : The thread count requested .
* @ config : The configuration data structure to update .
*
* If the thread count requested is invalid , a message is logged and - EINVAL returned . If the
* thread name is unknown , a message is logged but no error is returned .
*
* Return : VDO_SUCCESS or - EINVAL
*/
static int process_one_thread_config_spec ( const char * thread_param_type ,
unsigned int count ,
struct thread_count_config * config )
{
/* Handle limited thread parameters */
if ( strcmp ( thread_param_type , " bioRotationInterval " ) = = 0 ) {
if ( count = = 0 ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " thread config string error: 'bioRotationInterval' of at least 1 is required " ) ;
2023-11-16 21:24:06 -05:00
return - EINVAL ;
} else if ( count > VDO_BIO_ROTATION_INTERVAL_LIMIT ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " thread config string error: 'bioRotationInterval' cannot be higher than %d " ,
2023-11-16 21:24:06 -05:00
VDO_BIO_ROTATION_INTERVAL_LIMIT ) ;
return - EINVAL ;
}
config - > bio_rotation_interval = count ;
return VDO_SUCCESS ;
}
if ( strcmp ( thread_param_type , " logical " ) = = 0 ) {
if ( count > MAX_VDO_LOGICAL_ZONES ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " thread config string error: at most %d 'logical' threads are allowed " ,
2023-11-16 21:24:06 -05:00
MAX_VDO_LOGICAL_ZONES ) ;
return - EINVAL ;
}
config - > logical_zones = count ;
return VDO_SUCCESS ;
}
if ( strcmp ( thread_param_type , " physical " ) = = 0 ) {
if ( count > MAX_VDO_PHYSICAL_ZONES ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " thread config string error: at most %d 'physical' threads are allowed " ,
2023-11-16 21:24:06 -05:00
MAX_VDO_PHYSICAL_ZONES ) ;
return - EINVAL ;
}
config - > physical_zones = count ;
return VDO_SUCCESS ;
}
/* Handle other thread count parameters */
if ( count > MAXIMUM_VDO_THREADS ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " thread config string error: at most %d '%s' threads are allowed " ,
2023-11-16 21:24:06 -05:00
MAXIMUM_VDO_THREADS , thread_param_type ) ;
return - EINVAL ;
}
if ( strcmp ( thread_param_type , " hash " ) = = 0 ) {
config - > hash_zones = count ;
return VDO_SUCCESS ;
}
if ( strcmp ( thread_param_type , " cpu " ) = = 0 ) {
if ( count = = 0 ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " thread config string error: at least one 'cpu' thread required " ) ;
2023-11-16 21:24:06 -05:00
return - EINVAL ;
}
config - > cpu_threads = count ;
return VDO_SUCCESS ;
}
if ( strcmp ( thread_param_type , " ack " ) = = 0 ) {
config - > bio_ack_threads = count ;
return VDO_SUCCESS ;
}
if ( strcmp ( thread_param_type , " bio " ) = = 0 ) {
if ( count = = 0 ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " thread config string error: at least one 'bio' thread required " ) ;
2023-11-16 21:24:06 -05:00
return - EINVAL ;
}
config - > bio_threads = count ;
return VDO_SUCCESS ;
}
/*
* Don ' t fail , just log . This will handle version mismatches between user mode tools and
* kernel .
*/
2024-02-14 09:22:04 -06:00
vdo_log_info ( " unknown thread parameter type \" %s \" " , thread_param_type ) ;
2023-11-16 21:24:06 -05:00
return VDO_SUCCESS ;
}
/**
* parse_one_thread_config_spec ( ) - Parse one component of a thread parameter configuration string
* and update the configuration data structure .
* @ spec : The thread parameter specification string .
* @ config : The configuration data to be updated .
*/
static int parse_one_thread_config_spec ( const char * spec ,
struct thread_count_config * config )
{
unsigned int count ;
char * * fields ;
int result ;
result = split_string ( spec , ' = ' , & fields ) ;
2024-02-13 16:03:47 -06:00
if ( result ! = VDO_SUCCESS )
2023-11-16 21:24:06 -05:00
return result ;
if ( ( fields [ 0 ] = = NULL ) | | ( fields [ 1 ] = = NULL ) | | ( fields [ 2 ] ! = NULL ) ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " thread config string error: expected thread parameter assignment, saw \" %s \" " ,
2023-11-16 21:24:06 -05:00
spec ) ;
free_string_array ( fields ) ;
return - EINVAL ;
}
result = kstrtouint ( fields [ 1 ] , 10 , & count ) ;
2024-02-13 16:03:47 -06:00
if ( result ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " thread config string error: integer value needed, found \" %s \" " ,
2023-11-16 21:24:06 -05:00
fields [ 1 ] ) ;
free_string_array ( fields ) ;
return result ;
}
result = process_one_thread_config_spec ( fields [ 0 ] , count , config ) ;
free_string_array ( fields ) ;
return result ;
}
/**
* parse_thread_config_string ( ) - Parse the configuration string passed and update the specified
* counts and other parameters of various types of threads to be
* created .
* @ string : Thread parameter configuration string .
* @ config : The thread configuration data to update .
*
* The configuration string should contain one or more comma - separated specs of the form
* " typename=number " ; the supported type names are " cpu " , " ack " , " bio " , " bioRotationInterval " ,
* " logical " , " physical " , and " hash " .
*
* If an error occurs during parsing of a single key / value pair , we deem it serious enough to stop
* further parsing .
*
* This function can ' t set the " reason " value the caller wants to pass back , because we ' d want to
* format it to say which field was invalid , and we can ' t allocate the " reason " strings
* dynamically . So if an error occurs , we ' ll log the details and pass back an error .
*
* Return : VDO_SUCCESS or - EINVAL or - ENOMEM
*/
static int parse_thread_config_string ( const char * string ,
struct thread_count_config * config )
{
int result = VDO_SUCCESS ;
char * * specs ;
if ( strcmp ( " . " , string ) ! = 0 ) {
unsigned int i ;
result = split_string ( string , ' , ' , & specs ) ;
2024-02-13 16:03:47 -06:00
if ( result ! = VDO_SUCCESS )
2023-11-16 21:24:06 -05:00
return result ;
for ( i = 0 ; specs [ i ] ! = NULL ; i + + ) {
result = parse_one_thread_config_spec ( specs [ i ] , config ) ;
if ( result ! = VDO_SUCCESS )
break ;
}
free_string_array ( specs ) ;
}
return result ;
}
/**
* process_one_key_value_pair ( ) - Process one component of an optional parameter string and update
* the configuration data structure .
* @ key : The optional parameter key name .
* @ value : The optional parameter value .
* @ config : The configuration data structure to update .
*
* If the value requested is invalid , a message is logged and - EINVAL returned . If the key is
* unknown , a message is logged but no error is returned .
*
* Return : VDO_SUCCESS or - EINVAL
*/
static int process_one_key_value_pair ( const char * key , unsigned int value ,
struct device_config * config )
{
/* Non thread optional parameters */
if ( strcmp ( key , " maxDiscard " ) = = 0 ) {
if ( value = = 0 ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " optional parameter error: at least one max discard block required " ) ;
2023-11-16 21:24:06 -05:00
return - EINVAL ;
}
/* Max discard sectors in blkdev_issue_discard is UINT_MAX >> 9 */
if ( value > ( UINT_MAX / VDO_BLOCK_SIZE ) ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " optional parameter error: at most %d max discard blocks are allowed " ,
2023-11-16 21:24:06 -05:00
UINT_MAX / VDO_BLOCK_SIZE ) ;
return - EINVAL ;
}
config - > max_discard_blocks = value ;
return VDO_SUCCESS ;
}
/* Handles unknown key names */
return process_one_thread_config_spec ( key , value , & config - > thread_counts ) ;
}
/**
* parse_one_key_value_pair ( ) - Parse one key / value pair and update the configuration data
* structure .
* @ key : The optional key name .
* @ value : The optional value .
* @ config : The configuration data to be updated .
*
* Return : VDO_SUCCESS or error .
*/
static int parse_one_key_value_pair ( const char * key , const char * value ,
struct device_config * config )
{
unsigned int count ;
int result ;
if ( strcmp ( key , " deduplication " ) = = 0 )
return parse_bool ( value , " on " , " off " , & config - > deduplication ) ;
if ( strcmp ( key , " compression " ) = = 0 )
return parse_bool ( value , " on " , " off " , & config - > compression ) ;
/* The remaining arguments must have integral values. */
result = kstrtouint ( value , 10 , & count ) ;
2024-02-13 16:03:47 -06:00
if ( result ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " optional config string error: integer value needed, found \" %s \" " ,
2023-11-16 21:24:06 -05:00
value ) ;
return result ;
}
return process_one_key_value_pair ( key , count , config ) ;
}
/**
* parse_key_value_pairs ( ) - Parse all key / value pairs from a list of arguments .
* @ argc : The total number of arguments in list .
* @ argv : The list of key / value pairs .
* @ config : The device configuration data to update .
*
* If an error occurs during parsing of a single key / value pair , we deem it serious enough to stop
* further parsing .
*
* This function can ' t set the " reason " value the caller wants to pass back , because we ' d want to
* format it to say which field was invalid , and we can ' t allocate the " reason " strings
* dynamically . So if an error occurs , we ' ll log the details and return the error .
*
* Return : VDO_SUCCESS or error
*/
static int parse_key_value_pairs ( int argc , char * * argv , struct device_config * config )
{
int result = VDO_SUCCESS ;
while ( argc ) {
result = parse_one_key_value_pair ( argv [ 0 ] , argv [ 1 ] , config ) ;
if ( result ! = VDO_SUCCESS )
break ;
argc - = 2 ;
argv + = 2 ;
}
return result ;
}
/**
* parse_optional_arguments ( ) - Parse the configuration string passed in for optional arguments .
* @ arg_set : The structure holding the arguments to parse .
* @ error_ptr : Pointer to a buffer to hold the error string .
* @ config : Pointer to device configuration data to update .
*
* For V0 / V1 configurations , there will only be one optional parameter ; the thread configuration .
* The configuration string should contain one or more comma - separated specs of the form
* " typename=number " ; the supported type names are " cpu " , " ack " , " bio " , " bioRotationInterval " ,
* " logical " , " physical " , and " hash " .
*
* For V2 configurations and beyond , there could be any number of arguments . They should contain
* one or more key / value pairs separated by a space .
*
* Return : VDO_SUCCESS or error
*/
static int parse_optional_arguments ( struct dm_arg_set * arg_set , char * * error_ptr ,
struct device_config * config )
{
int result = VDO_SUCCESS ;
if ( config - > version = = 0 | | config - > version = = 1 ) {
result = parse_thread_config_string ( arg_set - > argv [ 0 ] ,
& config - > thread_counts ) ;
if ( result ! = VDO_SUCCESS ) {
* error_ptr = " Invalid thread-count configuration " ;
return VDO_BAD_CONFIGURATION ;
}
} else {
if ( ( arg_set - > argc % 2 ) ! = 0 ) {
* error_ptr = " Odd number of optional arguments given but they should be <key> <value> pairs " ;
return VDO_BAD_CONFIGURATION ;
}
result = parse_key_value_pairs ( arg_set - > argc , arg_set - > argv , config ) ;
if ( result ! = VDO_SUCCESS ) {
* error_ptr = " Invalid optional argument configuration " ;
return VDO_BAD_CONFIGURATION ;
}
}
return result ;
}
/**
* handle_parse_error ( ) - Handle a parsing error .
* @ config : The config to free .
* @ error_ptr : A place to store a constant string about the error .
* @ error_str : A constant string to store in error_ptr .
*/
static void handle_parse_error ( struct device_config * config , char * * error_ptr ,
char * error_str )
{
free_device_config ( config ) ;
* error_ptr = error_str ;
}
/**
* parse_device_config ( ) - Convert the dmsetup table into a struct device_config .
* @ argc : The number of table values .
* @ argv : The array of table values .
* @ ti : The target structure for this table .
* @ config_ptr : A pointer to return the allocated config .
*
* Return : VDO_SUCCESS or an error code .
*/
static int parse_device_config ( int argc , char * * argv , struct dm_target * ti ,
struct device_config * * config_ptr )
{
bool enable_512e ;
size_t logical_bytes = to_bytes ( ti - > len ) ;
struct dm_arg_set arg_set ;
char * * error_ptr = & ti - > error ;
struct device_config * config = NULL ;
int result ;
if ( ( logical_bytes % VDO_BLOCK_SIZE ) ! = 0 ) {
handle_parse_error ( config , error_ptr ,
" Logical size must be a multiple of 4096 " ) ;
return VDO_BAD_CONFIGURATION ;
}
if ( argc = = 0 ) {
handle_parse_error ( config , error_ptr , " Incorrect number of arguments " ) ;
return VDO_BAD_CONFIGURATION ;
}
2024-02-13 10:55:50 -06:00
result = vdo_allocate ( 1 , struct device_config , " device_config " , & config ) ;
2023-11-16 21:24:06 -05:00
if ( result ! = VDO_SUCCESS ) {
handle_parse_error ( config , error_ptr ,
" Could not allocate config structure " ) ;
return VDO_BAD_CONFIGURATION ;
}
config - > owning_target = ti ;
config - > logical_blocks = logical_bytes / VDO_BLOCK_SIZE ;
INIT_LIST_HEAD ( & config - > config_list ) ;
/* Save the original string. */
result = join_strings ( argv , argc , ' ' , & config - > original_string ) ;
if ( result ! = VDO_SUCCESS ) {
handle_parse_error ( config , error_ptr , " Could not populate string " ) ;
return VDO_BAD_CONFIGURATION ;
}
2024-02-14 09:22:04 -06:00
vdo_log_info ( " table line: %s " , config - > original_string ) ;
2023-11-16 21:24:06 -05:00
config - > thread_counts = ( struct thread_count_config ) {
. bio_ack_threads = 1 ,
. bio_threads = DEFAULT_VDO_BIO_SUBMIT_QUEUE_COUNT ,
. bio_rotation_interval = DEFAULT_VDO_BIO_SUBMIT_QUEUE_ROTATE_INTERVAL ,
. cpu_threads = 1 ,
. logical_zones = 0 ,
. physical_zones = 0 ,
. hash_zones = 0 ,
} ;
config - > max_discard_blocks = 1 ;
config - > deduplication = true ;
config - > compression = false ;
arg_set . argc = argc ;
arg_set . argv = argv ;
result = get_version_number ( argc , argv , error_ptr , & config - > version ) ;
if ( result ! = VDO_SUCCESS ) {
/* get_version_number sets error_ptr itself. */
handle_parse_error ( config , error_ptr , * error_ptr ) ;
return result ;
}
/* Move the arg pointer forward only if the argument was there. */
if ( config - > version > = 1 )
dm_shift_arg ( & arg_set ) ;
2024-02-13 10:55:50 -06:00
result = vdo_duplicate_string ( dm_shift_arg ( & arg_set ) , " parent device name " ,
2023-11-16 21:24:06 -05:00
& config - > parent_device_name ) ;
if ( result ! = VDO_SUCCESS ) {
handle_parse_error ( config , error_ptr ,
" Could not copy parent device name " ) ;
return VDO_BAD_CONFIGURATION ;
}
/* Get the physical blocks, if known. */
if ( config - > version > = 1 ) {
result = kstrtoull ( dm_shift_arg ( & arg_set ) , 10 , & config - > physical_blocks ) ;
if ( result ! = VDO_SUCCESS ) {
handle_parse_error ( config , error_ptr ,
" Invalid physical block count " ) ;
return VDO_BAD_CONFIGURATION ;
}
}
/* Get the logical block size and validate */
result = parse_bool ( dm_shift_arg ( & arg_set ) , " 512 " , " 4096 " , & enable_512e ) ;
if ( result ! = VDO_SUCCESS ) {
handle_parse_error ( config , error_ptr , " Invalid logical block size " ) ;
return VDO_BAD_CONFIGURATION ;
}
config - > logical_block_size = ( enable_512e ? 512 : 4096 ) ;
/* Skip past the two no longer used read cache options. */
if ( config - > version < = 1 )
dm_consume_args ( & arg_set , 2 ) ;
/* Get the page cache size. */
result = kstrtouint ( dm_shift_arg ( & arg_set ) , 10 , & config - > cache_size ) ;
if ( result ! = VDO_SUCCESS ) {
handle_parse_error ( config , error_ptr ,
" Invalid block map page cache size " ) ;
return VDO_BAD_CONFIGURATION ;
}
/* Get the block map era length. */
result = kstrtouint ( dm_shift_arg ( & arg_set ) , 10 , & config - > block_map_maximum_age ) ;
if ( result ! = VDO_SUCCESS ) {
handle_parse_error ( config , error_ptr , " Invalid block map maximum age " ) ;
return VDO_BAD_CONFIGURATION ;
}
/* Skip past the no longer used MD RAID5 optimization mode */
if ( config - > version < = 2 )
dm_consume_args ( & arg_set , 1 ) ;
/* Skip past the no longer used write policy setting */
if ( config - > version < = 3 )
dm_consume_args ( & arg_set , 1 ) ;
/* Skip past the no longer used pool name for older table lines */
if ( config - > version < = 2 ) {
/*
* Make sure the enum to get the pool name from argv directly is still in sync with
* the parsing of the table line .
*/
if ( & arg_set . argv [ 0 ] ! = & argv [ POOL_NAME_ARG_INDEX [ config - > version ] ] ) {
handle_parse_error ( config , error_ptr ,
" Pool name not in expected location " ) ;
return VDO_BAD_CONFIGURATION ;
}
dm_shift_arg ( & arg_set ) ;
}
/* Get the optional arguments and validate. */
result = parse_optional_arguments ( & arg_set , error_ptr , config ) ;
if ( result ! = VDO_SUCCESS ) {
/* parse_optional_arguments sets error_ptr itself. */
handle_parse_error ( config , error_ptr , * error_ptr ) ;
return result ;
}
/*
* Logical , physical , and hash zone counts can all be zero ; then we get one thread doing
* everything , our older configuration . If any zone count is non - zero , the others must be
* as well .
*/
if ( ( ( config - > thread_counts . logical_zones = = 0 ) ! =
( config - > thread_counts . physical_zones = = 0 ) ) | |
( ( config - > thread_counts . physical_zones = = 0 ) ! =
( config - > thread_counts . hash_zones = = 0 ) ) ) {
handle_parse_error ( config , error_ptr ,
" Logical, physical, and hash zones counts must all be zero or all non-zero " ) ;
return VDO_BAD_CONFIGURATION ;
}
if ( config - > cache_size <
( 2 * MAXIMUM_VDO_USER_VIOS * config - > thread_counts . logical_zones ) ) {
handle_parse_error ( config , error_ptr ,
" Insufficient block map cache for logical zones " ) ;
return VDO_BAD_CONFIGURATION ;
}
result = dm_get_device ( ti , config - > parent_device_name ,
dm_table_get_mode ( ti - > table ) , & config - > owned_device ) ;
if ( result ! = 0 ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " couldn't open device \" %s \" : error %d " ,
2023-11-16 21:24:06 -05:00
config - > parent_device_name , result ) ;
handle_parse_error ( config , error_ptr , " Unable to open storage device " ) ;
return VDO_BAD_CONFIGURATION ;
}
if ( config - > version = = 0 ) {
2024-04-11 15:53:44 +01:00
u64 device_size = bdev_nr_bytes ( config - > owned_device - > bdev ) ;
2023-11-16 21:24:06 -05:00
config - > physical_blocks = device_size / VDO_BLOCK_SIZE ;
}
* config_ptr = config ;
return result ;
}
static struct vdo * get_vdo_for_target ( struct dm_target * ti )
{
return ( ( struct device_config * ) ti - > private ) - > vdo ;
}
static int vdo_map_bio ( struct dm_target * ti , struct bio * bio )
{
struct vdo * vdo = get_vdo_for_target ( ti ) ;
struct vdo_work_queue * current_work_queue ;
const struct admin_state_code * code = vdo_get_admin_state_code ( & vdo - > admin . state ) ;
2024-02-13 14:57:33 -06:00
VDO_ASSERT_LOG_ONLY ( code - > normal , " vdo should not receive bios while in state %s " ,
code - > name ) ;
2023-11-16 21:24:06 -05:00
/* Count all incoming bios. */
vdo_count_bios ( & vdo - > stats . bios_in , bio ) ;
/* Handle empty bios. Empty flush bios are not associated with a vio. */
if ( ( bio_op ( bio ) = = REQ_OP_FLUSH ) | | ( ( bio - > bi_opf & REQ_PREFLUSH ) ! = 0 ) ) {
vdo_launch_flush ( vdo , bio ) ;
return DM_MAPIO_SUBMITTED ;
}
/* This could deadlock, */
current_work_queue = vdo_get_current_work_queue ( ) ;
BUG_ON ( ( current_work_queue ! = NULL ) & &
( vdo = = vdo_get_work_queue_owner ( current_work_queue ) - > vdo ) ) ;
vdo_launch_bio ( vdo - > data_vio_pool , bio ) ;
return DM_MAPIO_SUBMITTED ;
}
static void vdo_io_hints ( struct dm_target * ti , struct queue_limits * limits )
{
struct vdo * vdo = get_vdo_for_target ( ti ) ;
limits - > logical_block_size = vdo - > device_config - > logical_block_size ;
limits - > physical_block_size = VDO_BLOCK_SIZE ;
/* The minimum io size for random io */
2024-07-03 15:12:08 +02:00
limits - > io_min = VDO_BLOCK_SIZE ;
2023-11-16 21:24:06 -05:00
/* The optimal io size for streamed/sequential io */
2024-07-03 15:12:08 +02:00
limits - > io_opt = VDO_BLOCK_SIZE ;
2023-11-16 21:24:06 -05:00
/*
* Sets the maximum discard size that will be passed into VDO . This value comes from a
* table line value passed in during dmsetup create .
*
2024-02-15 11:35:15 -05:00
* The value 1024 is the largest usable value on HD systems . A 2048 sector discard on a
* busy HD system takes 31 seconds . We should use a value no higher than 1024 , which takes
* 15 to 16 seconds on a busy HD system . However , using large values results in 120 second
* blocked task warnings in kernel logs . In order to avoid these warnings , we choose to
* use the smallest reasonable value .
2023-11-16 21:24:06 -05:00
*
2024-02-10 11:05:15 -06:00
* The value is used by dm - thin to determine whether to pass down discards . The block layer
* splits large discards on this boundary when this is set .
2023-11-16 21:24:06 -05:00
*/
2024-07-09 13:00:36 -04:00
limits - > max_hw_discard_sectors =
2023-11-16 21:24:06 -05:00
( vdo - > device_config - > max_discard_blocks * VDO_SECTORS_PER_BLOCK ) ;
/*
* Force discards to not begin or end with a partial block by stating the granularity is
* 4 k .
*/
limits - > discard_granularity = VDO_BLOCK_SIZE ;
}
static int vdo_iterate_devices ( struct dm_target * ti , iterate_devices_callout_fn fn ,
void * data )
{
struct device_config * config = get_vdo_for_target ( ti ) - > device_config ;
return fn ( ti , config - > owned_device , 0 ,
config - > physical_blocks * VDO_SECTORS_PER_BLOCK , data ) ;
}
/*
* Status line is :
* < device > < operating mode > < in recovery > < index state > < compression state >
* < used physical blocks > < total physical blocks >
*/
static void vdo_status ( struct dm_target * ti , status_type_t status_type ,
unsigned int status_flags , char * result , unsigned int maxlen )
{
struct vdo * vdo = get_vdo_for_target ( ti ) ;
struct vdo_statistics * stats ;
struct device_config * device_config ;
/* N.B.: The DMEMIT macro uses the variables named "sz", "result", "maxlen". */
int sz = 0 ;
switch ( status_type ) {
case STATUSTYPE_INFO :
/* Report info for dmsetup status */
mutex_lock ( & vdo - > stats_mutex ) ;
vdo_fetch_statistics ( vdo , & vdo - > stats_buffer ) ;
stats = & vdo - > stats_buffer ;
DMEMIT ( " /dev/%pg %s %s %s %s %llu %llu " ,
vdo_get_backing_device ( vdo ) , stats - > mode ,
stats - > in_recovery_mode ? " recovering " : " - " ,
vdo_get_dedupe_index_state_name ( vdo - > hash_zones ) ,
vdo_get_compressing ( vdo ) ? " online " : " offline " ,
stats - > data_blocks_used + stats - > overhead_blocks_used ,
stats - > physical_blocks ) ;
mutex_unlock ( & vdo - > stats_mutex ) ;
break ;
case STATUSTYPE_TABLE :
/* Report the string actually specified in the beginning. */
device_config = ( struct device_config * ) ti - > private ;
DMEMIT ( " %s " , device_config - > original_string ) ;
break ;
case STATUSTYPE_IMA :
/* FIXME: We ought to be more detailed here, but this is what thin does. */
* result = ' \0 ' ;
break ;
}
}
static block_count_t __must_check get_underlying_device_block_count ( const struct vdo * vdo )
{
2024-04-11 15:53:44 +01:00
return bdev_nr_bytes ( vdo_get_backing_device ( vdo ) ) / VDO_BLOCK_SIZE ;
2023-11-16 21:24:06 -05:00
}
static int __must_check process_vdo_message_locked ( struct vdo * vdo , unsigned int argc ,
char * * argv )
{
if ( ( argc = = 2 ) & & ( strcasecmp ( argv [ 0 ] , " compression " ) = = 0 ) ) {
if ( strcasecmp ( argv [ 1 ] , " on " ) = = 0 ) {
vdo_set_compressing ( vdo , true ) ;
return 0 ;
}
if ( strcasecmp ( argv [ 1 ] , " off " ) = = 0 ) {
vdo_set_compressing ( vdo , false ) ;
return 0 ;
}
2024-02-14 09:22:04 -06:00
vdo_log_warning ( " invalid argument '%s' to dmsetup compression message " ,
2023-11-16 21:24:06 -05:00
argv [ 1 ] ) ;
return - EINVAL ;
}
2024-02-14 09:22:04 -06:00
vdo_log_warning ( " unrecognized dmsetup message '%s' received " , argv [ 0 ] ) ;
2023-11-16 21:24:06 -05:00
return - EINVAL ;
}
/*
* If the message is a dump , just do it . Otherwise , check that no other message is being processed ,
* and only proceed if so .
* Returns - EBUSY if another message is being processed
*/
static int __must_check process_vdo_message ( struct vdo * vdo , unsigned int argc ,
char * * argv )
{
int result ;
/*
* All messages which may be processed in parallel with other messages should be handled
* here before the atomic check below . Messages which should be exclusive should be
* processed in process_vdo_message_locked ( ) .
*/
/* Dump messages should always be processed */
if ( strcasecmp ( argv [ 0 ] , " dump " ) = = 0 )
return vdo_dump ( vdo , argc , argv , " dmsetup message " ) ;
if ( argc = = 1 ) {
if ( strcasecmp ( argv [ 0 ] , " dump-on-shutdown " ) = = 0 ) {
vdo - > dump_on_shutdown = true ;
return 0 ;
}
/* Index messages should always be processed */
if ( ( strcasecmp ( argv [ 0 ] , " index-close " ) = = 0 ) | |
( strcasecmp ( argv [ 0 ] , " index-create " ) = = 0 ) | |
( strcasecmp ( argv [ 0 ] , " index-disable " ) = = 0 ) | |
( strcasecmp ( argv [ 0 ] , " index-enable " ) = = 0 ) )
return vdo_message_dedupe_index ( vdo - > hash_zones , argv [ 0 ] ) ;
}
if ( atomic_cmpxchg ( & vdo - > processing_message , 0 , 1 ) ! = 0 )
return - EBUSY ;
result = process_vdo_message_locked ( vdo , argc , argv ) ;
/* Pairs with the implicit barrier in cmpxchg just above */
smp_wmb ( ) ;
atomic_set ( & vdo - > processing_message , 0 ) ;
return result ;
}
static int vdo_message ( struct dm_target * ti , unsigned int argc , char * * argv ,
char * result_buffer , unsigned int maxlen )
{
struct registered_thread allocating_thread , instance_thread ;
struct vdo * vdo ;
int result ;
if ( argc = = 0 ) {
2024-02-14 09:22:04 -06:00
vdo_log_warning ( " unspecified dmsetup message " ) ;
2023-11-16 21:24:06 -05:00
return - EINVAL ;
}
vdo = get_vdo_for_target ( ti ) ;
2024-02-13 10:55:50 -06:00
vdo_register_allocating_thread ( & allocating_thread , NULL ) ;
2024-02-09 14:53:05 -06:00
vdo_register_thread_device_id ( & instance_thread , & vdo - > instance ) ;
2023-11-16 21:24:06 -05:00
/*
* Must be done here so we don ' t map return codes . The code in dm - ioctl expects a 1 for a
* return code to look at the buffer and see if it is full or not .
*/
if ( ( argc = = 1 ) & & ( strcasecmp ( argv [ 0 ] , " stats " ) = = 0 ) ) {
vdo_write_stats ( vdo , result_buffer , maxlen ) ;
result = 1 ;
2024-07-18 14:02:38 -04:00
} else if ( ( argc = = 1 ) & & ( strcasecmp ( argv [ 0 ] , " config " ) = = 0 ) ) {
vdo_write_config ( vdo , & result_buffer , & maxlen ) ;
result = 1 ;
2023-11-16 21:24:06 -05:00
} else {
2024-01-26 21:42:29 -05:00
result = vdo_status_to_errno ( process_vdo_message ( vdo , argc , argv ) ) ;
2023-11-16 21:24:06 -05:00
}
2024-02-09 14:53:05 -06:00
vdo_unregister_thread_device_id ( ) ;
2024-02-13 10:55:50 -06:00
vdo_unregister_allocating_thread ( ) ;
2023-11-16 21:24:06 -05:00
return result ;
}
static void configure_target_capabilities ( struct dm_target * ti )
{
ti - > discards_supported = 1 ;
ti - > flush_supported = true ;
ti - > num_discard_bios = 1 ;
ti - > num_flush_bios = 1 ;
/*
* If this value changes , please make sure to update the value for max_discard_sectors
* accordingly .
*/
BUG_ON ( dm_set_target_max_io_len ( ti , VDO_SECTORS_PER_BLOCK ) ! = 0 ) ;
}
/*
* Implements vdo_filter_fn .
*/
static bool vdo_uses_device ( struct vdo * vdo , const void * context )
{
const struct device_config * config = context ;
return vdo_get_backing_device ( vdo ) - > bd_dev = = config - > owned_device - > bdev - > bd_dev ;
}
/**
* get_thread_id_for_phase ( ) - Get the thread id for the current phase of the admin operation in
* progress .
*/
static thread_id_t __must_check get_thread_id_for_phase ( struct vdo * vdo )
{
switch ( vdo - > admin . phase ) {
case RESUME_PHASE_PACKER :
case RESUME_PHASE_FLUSHER :
case SUSPEND_PHASE_PACKER :
case SUSPEND_PHASE_FLUSHES :
return vdo - > thread_config . packer_thread ;
case RESUME_PHASE_DATA_VIOS :
case SUSPEND_PHASE_DATA_VIOS :
return vdo - > thread_config . cpu_thread ;
case LOAD_PHASE_DRAIN_JOURNAL :
case RESUME_PHASE_JOURNAL :
case SUSPEND_PHASE_JOURNAL :
return vdo - > thread_config . journal_thread ;
default :
return vdo - > thread_config . admin_thread ;
}
}
static struct vdo_completion * prepare_admin_completion ( struct vdo * vdo ,
vdo_action_fn callback ,
vdo_action_fn error_handler )
{
struct vdo_completion * completion = & vdo - > admin . completion ;
/*
* We can ' t use vdo_prepare_completion_for_requeue ( ) here because we don ' t want to reset
* any error in the completion .
*/
completion - > callback = callback ;
completion - > error_handler = error_handler ;
completion - > callback_thread_id = get_thread_id_for_phase ( vdo ) ;
completion - > requeue = true ;
return completion ;
}
/**
* advance_phase ( ) - Increment the phase of the current admin operation and prepare the admin
* completion to run on the thread for the next phase .
* @ vdo : The on which an admin operation is being performed
*
* Return : The current phase
*/
static u32 advance_phase ( struct vdo * vdo )
{
u32 phase = vdo - > admin . phase + + ;
vdo - > admin . completion . callback_thread_id = get_thread_id_for_phase ( vdo ) ;
vdo - > admin . completion . requeue = true ;
return phase ;
}
/*
* Perform an administrative operation ( load , suspend , grow logical , or grow physical ) . This method
* should not be called from vdo threads .
*/
static int perform_admin_operation ( struct vdo * vdo , u32 starting_phase ,
vdo_action_fn callback , vdo_action_fn error_handler ,
const char * type )
{
int result ;
struct vdo_administrator * admin = & vdo - > admin ;
if ( atomic_cmpxchg ( & admin - > busy , 0 , 1 ) ! = 0 ) {
2024-02-14 09:22:04 -06:00
return vdo_log_error_strerror ( VDO_COMPONENT_BUSY ,
2023-11-16 21:24:06 -05:00
" Can't start %s operation, another operation is already in progress " ,
type ) ;
}
admin - > phase = starting_phase ;
reinit_completion ( & admin - > callback_sync ) ;
vdo_reset_completion ( & admin - > completion ) ;
vdo_launch_completion ( prepare_admin_completion ( vdo , callback , error_handler ) ) ;
/*
* Using the " interruptible " interface means that Linux will not log a message when we wait
* for more than 120 seconds .
*/
2024-02-14 10:25:39 -05:00
while ( wait_for_completion_interruptible ( & admin - > callback_sync ) ) {
/* However, if we get a signal in a user-mode process, we could spin... */
2023-11-16 21:24:06 -05:00
fsleep ( 1000 ) ;
2024-02-14 10:25:39 -05:00
}
2023-11-16 21:24:06 -05:00
result = admin - > completion . result ;
/* pairs with implicit barrier in cmpxchg above */
smp_wmb ( ) ;
atomic_set ( & admin - > busy , 0 ) ;
return result ;
}
/* Assert that we are operating on the correct thread for the current phase. */
static void assert_admin_phase_thread ( struct vdo * vdo , const char * what )
{
2024-02-13 14:57:33 -06:00
VDO_ASSERT_LOG_ONLY ( vdo_get_callback_thread_id ( ) = = get_thread_id_for_phase ( vdo ) ,
" %s on correct thread for %s " , what ,
ADMIN_PHASE_NAMES [ vdo - > admin . phase ] ) ;
2023-11-16 21:24:06 -05:00
}
/**
* finish_operation_callback ( ) - Callback to finish an admin operation .
* @ completion : The admin_completion .
*/
static void finish_operation_callback ( struct vdo_completion * completion )
{
struct vdo_administrator * admin = & completion - > vdo - > admin ;
vdo_finish_operation ( & admin - > state , completion - > result ) ;
complete ( & admin - > callback_sync ) ;
}
/**
* decode_from_super_block ( ) - Decode the VDO state from the super block and validate that it is
* correct .
* @ vdo : The vdo being loaded .
*
* On error from this method , the component states must be destroyed explicitly . If this method
* returns successfully , the component states must not be destroyed .
*
* Return : VDO_SUCCESS or an error .
*/
static int __must_check decode_from_super_block ( struct vdo * vdo )
{
const struct device_config * config = vdo - > device_config ;
int result ;
result = vdo_decode_component_states ( vdo - > super_block . buffer , & vdo - > geometry ,
& vdo - > states ) ;
if ( result ! = VDO_SUCCESS )
return result ;
vdo_set_state ( vdo , vdo - > states . vdo . state ) ;
vdo - > load_state = vdo - > states . vdo . state ;
/*
* If the device config specifies a larger logical size than was recorded in the super
* block , just accept it .
*/
if ( vdo - > states . vdo . config . logical_blocks < config - > logical_blocks ) {
2024-02-14 09:22:04 -06:00
vdo_log_warning ( " Growing logical size: a logical size of %llu blocks was specified, but that differs from the %llu blocks configured in the vdo super block " ,
2023-11-16 21:24:06 -05:00
( unsigned long long ) config - > logical_blocks ,
( unsigned long long ) vdo - > states . vdo . config . logical_blocks ) ;
vdo - > states . vdo . config . logical_blocks = config - > logical_blocks ;
}
result = vdo_validate_component_states ( & vdo - > states , vdo - > geometry . nonce ,
config - > physical_blocks ,
config - > logical_blocks ) ;
if ( result ! = VDO_SUCCESS )
return result ;
vdo - > layout = vdo - > states . layout ;
return VDO_SUCCESS ;
}
/**
* decode_vdo ( ) - Decode the component data portion of a super block and fill in the corresponding
* portions of the vdo being loaded .
* @ vdo : The vdo being loaded .
*
* This will also allocate the recovery journal and slab depot . If this method is called with an
* asynchronous layer ( i . e . a thread config which specifies at least one base thread ) , the block
* map and packer will be constructed as well .
*
* Return : VDO_SUCCESS or an error .
*/
static int __must_check decode_vdo ( struct vdo * vdo )
{
block_count_t maximum_age , journal_length ;
struct partition * partition ;
int result ;
result = decode_from_super_block ( vdo ) ;
if ( result ! = VDO_SUCCESS ) {
vdo_destroy_component_states ( & vdo - > states ) ;
return result ;
}
maximum_age = vdo_convert_maximum_age ( vdo - > device_config - > block_map_maximum_age ) ;
journal_length =
vdo_get_recovery_journal_length ( vdo - > states . vdo . config . recovery_journal_size ) ;
if ( maximum_age > ( journal_length / 2 ) ) {
2024-02-14 09:22:04 -06:00
return vdo_log_error_strerror ( VDO_BAD_CONFIGURATION ,
2023-11-16 21:24:06 -05:00
" maximum age: %llu exceeds limit %llu " ,
( unsigned long long ) maximum_age ,
( unsigned long long ) ( journal_length / 2 ) ) ;
}
if ( maximum_age = = 0 ) {
2024-02-14 09:22:04 -06:00
return vdo_log_error_strerror ( VDO_BAD_CONFIGURATION ,
2023-11-16 21:24:06 -05:00
" maximum age must be greater than 0 " ) ;
}
result = vdo_enable_read_only_entry ( vdo ) ;
if ( result ! = VDO_SUCCESS )
return result ;
partition = vdo_get_known_partition ( & vdo - > layout ,
VDO_RECOVERY_JOURNAL_PARTITION ) ;
result = vdo_decode_recovery_journal ( vdo - > states . recovery_journal ,
vdo - > states . vdo . nonce , vdo , partition ,
vdo - > states . vdo . complete_recoveries ,
vdo - > states . vdo . config . recovery_journal_size ,
& vdo - > recovery_journal ) ;
if ( result ! = VDO_SUCCESS )
return result ;
partition = vdo_get_known_partition ( & vdo - > layout , VDO_SLAB_SUMMARY_PARTITION ) ;
result = vdo_decode_slab_depot ( vdo - > states . slab_depot , vdo , partition ,
& vdo - > depot ) ;
if ( result ! = VDO_SUCCESS )
return result ;
result = vdo_decode_block_map ( vdo - > states . block_map ,
vdo - > states . vdo . config . logical_blocks , vdo ,
vdo - > recovery_journal , vdo - > states . vdo . nonce ,
vdo - > device_config - > cache_size , maximum_age ,
& vdo - > block_map ) ;
if ( result ! = VDO_SUCCESS )
return result ;
result = vdo_make_physical_zones ( vdo , & vdo - > physical_zones ) ;
if ( result ! = VDO_SUCCESS )
return result ;
/* The logical zones depend on the physical zones already existing. */
result = vdo_make_logical_zones ( vdo , & vdo - > logical_zones ) ;
if ( result ! = VDO_SUCCESS )
return result ;
return vdo_make_hash_zones ( vdo , & vdo - > hash_zones ) ;
}
/**
* pre_load_callback ( ) - Callback to initiate a pre - load , registered in vdo_initialize ( ) .
* @ completion : The admin completion .
*/
static void pre_load_callback ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
int result ;
assert_admin_phase_thread ( vdo , __func__ ) ;
switch ( advance_phase ( vdo ) ) {
case PRE_LOAD_PHASE_START :
result = vdo_start_operation ( & vdo - > admin . state ,
VDO_ADMIN_STATE_PRE_LOADING ) ;
if ( result ! = VDO_SUCCESS ) {
vdo_continue_completion ( completion , result ) ;
return ;
}
vdo_load_super_block ( vdo , completion ) ;
return ;
case PRE_LOAD_PHASE_LOAD_COMPONENTS :
vdo_continue_completion ( completion , decode_vdo ( vdo ) ) ;
return ;
case PRE_LOAD_PHASE_END :
break ;
default :
vdo_set_completion_result ( completion , UDS_BAD_STATE ) ;
}
finish_operation_callback ( completion ) ;
}
static void release_instance ( unsigned int instance )
{
mutex_lock ( & instances_lock ) ;
if ( instance > = instances . bit_count ) {
2024-02-13 14:57:33 -06:00
VDO_ASSERT_LOG_ONLY ( false ,
" instance number %u must be less than bit count %u " ,
instance , instances . bit_count ) ;
2023-11-16 21:24:06 -05:00
} else if ( test_bit ( instance , instances . words ) = = 0 ) {
2024-02-13 14:57:33 -06:00
VDO_ASSERT_LOG_ONLY ( false , " instance number %u must be allocated " , instance ) ;
2023-11-16 21:24:06 -05:00
} else {
__clear_bit ( instance , instances . words ) ;
instances . count - = 1 ;
}
mutex_unlock ( & instances_lock ) ;
}
static void set_device_config ( struct dm_target * ti , struct vdo * vdo ,
struct device_config * config )
{
list_del_init ( & config - > config_list ) ;
list_add_tail ( & config - > config_list , & vdo - > device_config_list ) ;
config - > vdo = vdo ;
ti - > private = config ;
configure_target_capabilities ( ti ) ;
}
static int vdo_initialize ( struct dm_target * ti , unsigned int instance ,
struct device_config * config )
{
struct vdo * vdo ;
int result ;
u64 block_size = VDO_BLOCK_SIZE ;
u64 logical_size = to_bytes ( ti - > len ) ;
block_count_t logical_blocks = logical_size / block_size ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " loading device '%s' " , vdo_get_device_name ( ti ) ) ;
vdo_log_debug ( " Logical block size = %llu " , ( u64 ) config - > logical_block_size ) ;
vdo_log_debug ( " Logical blocks = %llu " , logical_blocks ) ;
vdo_log_debug ( " Physical block size = %llu " , ( u64 ) block_size ) ;
vdo_log_debug ( " Physical blocks = %llu " , config - > physical_blocks ) ;
vdo_log_debug ( " Block map cache blocks = %u " , config - > cache_size ) ;
vdo_log_debug ( " Block map maximum age = %u " , config - > block_map_maximum_age ) ;
vdo_log_debug ( " Deduplication = %s " , ( config - > deduplication ? " on " : " off " ) ) ;
vdo_log_debug ( " Compression = %s " , ( config - > compression ? " on " : " off " ) ) ;
2023-11-16 21:24:06 -05:00
vdo = vdo_find_matching ( vdo_uses_device , config ) ;
if ( vdo ! = NULL ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " Existing vdo already uses device %s " ,
2023-11-16 21:24:06 -05:00
vdo - > device_config - > parent_device_name ) ;
ti - > error = " Cannot share storage device with already-running VDO " ;
return VDO_BAD_CONFIGURATION ;
}
result = vdo_make ( instance , config , & ti - > error , & vdo ) ;
if ( result ! = VDO_SUCCESS ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " Could not create VDO device. (VDO error %d, message %s) " ,
2023-11-16 21:24:06 -05:00
result , ti - > error ) ;
vdo_destroy ( vdo ) ;
return result ;
}
result = perform_admin_operation ( vdo , PRE_LOAD_PHASE_START , pre_load_callback ,
finish_operation_callback , " pre-load " ) ;
if ( result ! = VDO_SUCCESS ) {
ti - > error = ( ( result = = VDO_INVALID_ADMIN_STATE ) ?
" Pre-load is only valid immediately after initialization " :
" Cannot load metadata from device " ) ;
2024-02-14 09:22:04 -06:00
vdo_log_error ( " Could not start VDO device. (VDO error %d, message %s) " ,
2023-11-16 21:24:06 -05:00
result , ti - > error ) ;
vdo_destroy ( vdo ) ;
return result ;
}
set_device_config ( ti , vdo , config ) ;
vdo - > device_config = config ;
return VDO_SUCCESS ;
}
/* Implements vdo_filter_fn. */
static bool __must_check vdo_is_named ( struct vdo * vdo , const void * context )
{
struct dm_target * ti = vdo - > device_config - > owning_target ;
const char * device_name = vdo_get_device_name ( ti ) ;
return strcmp ( device_name , context ) = = 0 ;
}
/**
* get_bit_array_size ( ) - Return the number of bytes needed to store a bit array of the specified
* capacity in an array of unsigned longs .
* @ bit_count : The number of bits the array must hold .
*
* Return : the number of bytes needed for the array representation .
*/
static size_t get_bit_array_size ( unsigned int bit_count )
{
/* Round up to a multiple of the word size and convert to a byte count. */
return ( BITS_TO_LONGS ( bit_count ) * sizeof ( unsigned long ) ) ;
}
/**
* grow_bit_array ( ) - Re - allocate the bitmap word array so there will more instance numbers that
* can be allocated .
*
* Since the array is initially NULL , this also initializes the array the first time we allocate an
* instance number .
*
2024-02-13 12:06:53 -06:00
* Return : VDO_SUCCESS or an error code from the allocation
2023-11-16 21:24:06 -05:00
*/
static int grow_bit_array ( void )
{
unsigned int new_count = max ( instances . bit_count + BIT_COUNT_INCREMENT ,
( unsigned int ) BIT_COUNT_MINIMUM ) ;
unsigned long * new_words ;
int result ;
2024-02-13 10:55:50 -06:00
result = vdo_reallocate_memory ( instances . words ,
2023-11-16 21:24:06 -05:00
get_bit_array_size ( instances . bit_count ) ,
get_bit_array_size ( new_count ) ,
" instance number bit array " , & new_words ) ;
2024-02-13 12:06:53 -06:00
if ( result ! = VDO_SUCCESS )
2023-11-16 21:24:06 -05:00
return result ;
instances . bit_count = new_count ;
instances . words = new_words ;
2024-02-13 12:06:53 -06:00
return VDO_SUCCESS ;
2023-11-16 21:24:06 -05:00
}
/**
* allocate_instance ( ) - Allocate an instance number .
* @ instance_ptr : A point to hold the instance number
*
2024-02-13 12:06:53 -06:00
* Return : VDO_SUCCESS or an error code
2023-11-16 21:24:06 -05:00
*
* This function must be called while holding the instances lock .
*/
static int allocate_instance ( unsigned int * instance_ptr )
{
unsigned int instance ;
int result ;
/* If there are no unallocated instances, grow the bit array. */
if ( instances . count > = instances . bit_count ) {
result = grow_bit_array ( ) ;
2024-02-13 12:06:53 -06:00
if ( result ! = VDO_SUCCESS )
2023-11-16 21:24:06 -05:00
return result ;
}
/*
* There must be a zero bit somewhere now . Find it , starting just after the last instance
* allocated .
*/
instance = find_next_zero_bit ( instances . words , instances . bit_count ,
instances . next ) ;
if ( instance > = instances . bit_count ) {
/* Nothing free after next, so wrap around to instance zero. */
instance = find_first_zero_bit ( instances . words , instances . bit_count ) ;
2024-02-13 14:57:33 -06:00
result = VDO_ASSERT ( instance < instances . bit_count ,
" impossibly, no zero bit found " ) ;
if ( result ! = VDO_SUCCESS )
2023-11-16 21:24:06 -05:00
return result ;
}
__set_bit ( instance , instances . words ) ;
instances . count + + ;
instances . next = instance + 1 ;
* instance_ptr = instance ;
2024-02-13 12:06:53 -06:00
return VDO_SUCCESS ;
2023-11-16 21:24:06 -05:00
}
static int construct_new_vdo_registered ( struct dm_target * ti , unsigned int argc ,
char * * argv , unsigned int instance )
{
int result ;
struct device_config * config ;
result = parse_device_config ( argc , argv , ti , & config ) ;
if ( result ! = VDO_SUCCESS ) {
2024-02-14 09:22:04 -06:00
vdo_log_error_strerror ( result , " parsing failed: %s " , ti - > error ) ;
2023-11-16 21:24:06 -05:00
release_instance ( instance ) ;
return - EINVAL ;
}
/* Beyond this point, the instance number will be cleaned up for us if needed */
result = vdo_initialize ( ti , instance , config ) ;
if ( result ! = VDO_SUCCESS ) {
release_instance ( instance ) ;
free_device_config ( config ) ;
2024-01-26 21:42:29 -05:00
return vdo_status_to_errno ( result ) ;
2023-11-16 21:24:06 -05:00
}
return VDO_SUCCESS ;
}
static int construct_new_vdo ( struct dm_target * ti , unsigned int argc , char * * argv )
{
int result ;
unsigned int instance ;
struct registered_thread instance_thread ;
mutex_lock ( & instances_lock ) ;
result = allocate_instance ( & instance ) ;
mutex_unlock ( & instances_lock ) ;
if ( result ! = VDO_SUCCESS )
return - ENOMEM ;
2024-02-09 14:53:05 -06:00
vdo_register_thread_device_id ( & instance_thread , & instance ) ;
2023-11-16 21:24:06 -05:00
result = construct_new_vdo_registered ( ti , argc , argv , instance ) ;
2024-02-09 14:53:05 -06:00
vdo_unregister_thread_device_id ( ) ;
2023-11-16 21:24:06 -05:00
return result ;
}
/**
* check_may_grow_physical ( ) - Callback to check that we ' re not in recovery mode , used in
* vdo_prepare_to_grow_physical ( ) .
* @ completion : The admin completion .
*/
static void check_may_grow_physical ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
assert_admin_phase_thread ( vdo , __func__ ) ;
/* These checks can only be done from a vdo thread. */
if ( vdo_is_read_only ( vdo ) )
vdo_set_completion_result ( completion , VDO_READ_ONLY ) ;
if ( vdo_in_recovery_mode ( vdo ) )
vdo_set_completion_result ( completion , VDO_RETRY_AFTER_REBUILD ) ;
finish_operation_callback ( completion ) ;
}
static block_count_t get_partition_size ( struct layout * layout , enum partition_id id )
{
return vdo_get_known_partition ( layout , id ) - > count ;
}
/**
* grow_layout ( ) - Make the layout for growing a vdo .
* @ vdo : The vdo preparing to grow .
* @ old_size : The current size of the vdo .
* @ new_size : The size to which the vdo will be grown .
*
* Return : VDO_SUCCESS or an error code .
*/
static int grow_layout ( struct vdo * vdo , block_count_t old_size , block_count_t new_size )
{
int result ;
block_count_t min_new_size ;
if ( vdo - > next_layout . size = = new_size ) {
/* We are already prepared to grow to the new size, so we're done. */
return VDO_SUCCESS ;
}
/* Make a copy completion if there isn't one */
if ( vdo - > partition_copier = = NULL ) {
vdo - > partition_copier = dm_kcopyd_client_create ( NULL ) ;
2023-11-30 19:54:56 -05:00
if ( IS_ERR ( vdo - > partition_copier ) ) {
result = PTR_ERR ( vdo - > partition_copier ) ;
vdo - > partition_copier = NULL ;
return result ;
}
2023-11-16 21:24:06 -05:00
}
/* Free any unused preparation. */
vdo_uninitialize_layout ( & vdo - > next_layout ) ;
/*
* Make a new layout with the existing partition sizes for everything but the slab depot
* partition .
*/
result = vdo_initialize_layout ( new_size , vdo - > layout . start ,
get_partition_size ( & vdo - > layout ,
VDO_BLOCK_MAP_PARTITION ) ,
get_partition_size ( & vdo - > layout ,
VDO_RECOVERY_JOURNAL_PARTITION ) ,
get_partition_size ( & vdo - > layout ,
VDO_SLAB_SUMMARY_PARTITION ) ,
& vdo - > next_layout ) ;
if ( result ! = VDO_SUCCESS ) {
2024-02-13 10:55:50 -06:00
dm_kcopyd_client_destroy ( vdo_forget ( vdo - > partition_copier ) ) ;
2023-11-16 21:24:06 -05:00
return result ;
}
/* Ensure the new journal and summary are entirely within the added blocks. */
min_new_size = ( old_size +
get_partition_size ( & vdo - > next_layout ,
VDO_SLAB_SUMMARY_PARTITION ) +
get_partition_size ( & vdo - > next_layout ,
VDO_RECOVERY_JOURNAL_PARTITION ) ) ;
if ( min_new_size > new_size ) {
/* Copying the journal and summary would destroy some old metadata. */
vdo_uninitialize_layout ( & vdo - > next_layout ) ;
2024-02-13 10:55:50 -06:00
dm_kcopyd_client_destroy ( vdo_forget ( vdo - > partition_copier ) ) ;
2023-11-16 21:24:06 -05:00
return VDO_INCREMENT_TOO_SMALL ;
}
return VDO_SUCCESS ;
}
static int prepare_to_grow_physical ( struct vdo * vdo , block_count_t new_physical_blocks )
{
int result ;
block_count_t current_physical_blocks = vdo - > states . vdo . config . physical_blocks ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " Preparing to resize physical to %llu " ,
2023-11-16 21:24:06 -05:00
( unsigned long long ) new_physical_blocks ) ;
2024-02-13 14:57:33 -06:00
VDO_ASSERT_LOG_ONLY ( ( new_physical_blocks > current_physical_blocks ) ,
" New physical size is larger than current physical size " ) ;
2023-11-16 21:24:06 -05:00
result = perform_admin_operation ( vdo , PREPARE_GROW_PHYSICAL_PHASE_START ,
check_may_grow_physical ,
finish_operation_callback ,
" prepare grow-physical " ) ;
if ( result ! = VDO_SUCCESS )
return result ;
result = grow_layout ( vdo , current_physical_blocks , new_physical_blocks ) ;
if ( result ! = VDO_SUCCESS )
return result ;
result = vdo_prepare_to_grow_slab_depot ( vdo - > depot ,
vdo_get_known_partition ( & vdo - > next_layout ,
VDO_SLAB_DEPOT_PARTITION ) ) ;
if ( result ! = VDO_SUCCESS ) {
vdo_uninitialize_layout ( & vdo - > next_layout ) ;
return result ;
}
2024-02-14 09:22:04 -06:00
vdo_log_info ( " Done preparing to resize physical " ) ;
2023-11-16 21:24:06 -05:00
return VDO_SUCCESS ;
}
/**
* validate_new_device_config ( ) - Check whether a new device config represents a valid modification
* to an existing config .
* @ to_validate : The new config to validate .
* @ config : The existing config .
* @ may_grow : Set to true if growing the logical and physical size of the vdo is currently
* permitted .
* @ error_ptr : A pointer to hold the reason for any error .
*
* Return : VDO_SUCCESS or an error .
*/
static int validate_new_device_config ( struct device_config * to_validate ,
struct device_config * config , bool may_grow ,
char * * error_ptr )
{
if ( to_validate - > owning_target - > begin ! = config - > owning_target - > begin ) {
* error_ptr = " Starting sector cannot change " ;
return VDO_PARAMETER_MISMATCH ;
}
if ( to_validate - > logical_block_size ! = config - > logical_block_size ) {
* error_ptr = " Logical block size cannot change " ;
return VDO_PARAMETER_MISMATCH ;
}
if ( to_validate - > logical_blocks < config - > logical_blocks ) {
* error_ptr = " Can't shrink VDO logical size " ;
return VDO_PARAMETER_MISMATCH ;
}
if ( to_validate - > cache_size ! = config - > cache_size ) {
* error_ptr = " Block map cache size cannot change " ;
return VDO_PARAMETER_MISMATCH ;
}
if ( to_validate - > block_map_maximum_age ! = config - > block_map_maximum_age ) {
* error_ptr = " Block map maximum age cannot change " ;
return VDO_PARAMETER_MISMATCH ;
}
if ( memcmp ( & to_validate - > thread_counts , & config - > thread_counts ,
sizeof ( struct thread_count_config ) ) ! = 0 ) {
* error_ptr = " Thread configuration cannot change " ;
return VDO_PARAMETER_MISMATCH ;
}
if ( to_validate - > physical_blocks < config - > physical_blocks ) {
* error_ptr = " Removing physical storage from a VDO is not supported " ;
return VDO_NOT_IMPLEMENTED ;
}
if ( ! may_grow & & ( to_validate - > physical_blocks > config - > physical_blocks ) ) {
* error_ptr = " VDO physical size may not grow in current state " ;
return VDO_NOT_IMPLEMENTED ;
}
return VDO_SUCCESS ;
}
static int prepare_to_modify ( struct dm_target * ti , struct device_config * config ,
struct vdo * vdo )
{
int result ;
bool may_grow = ( vdo_get_admin_state ( vdo ) ! = VDO_ADMIN_STATE_PRE_LOADED ) ;
result = validate_new_device_config ( config , vdo - > device_config , may_grow ,
& ti - > error ) ;
if ( result ! = VDO_SUCCESS )
return - EINVAL ;
if ( config - > logical_blocks > vdo - > device_config - > logical_blocks ) {
block_count_t logical_blocks = vdo - > states . vdo . config . logical_blocks ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " Preparing to resize logical to %llu " ,
2023-11-16 21:24:06 -05:00
( unsigned long long ) config - > logical_blocks ) ;
2024-02-13 14:57:33 -06:00
VDO_ASSERT_LOG_ONLY ( ( config - > logical_blocks > logical_blocks ) ,
" New logical size is larger than current size " ) ;
2023-11-16 21:24:06 -05:00
result = vdo_prepare_to_grow_block_map ( vdo - > block_map ,
config - > logical_blocks ) ;
if ( result ! = VDO_SUCCESS ) {
ti - > error = " Device vdo_prepare_to_grow_logical failed " ;
return result ;
}
2024-02-14 09:22:04 -06:00
vdo_log_info ( " Done preparing to resize logical " ) ;
2023-11-16 21:24:06 -05:00
}
if ( config - > physical_blocks > vdo - > device_config - > physical_blocks ) {
result = prepare_to_grow_physical ( vdo , config - > physical_blocks ) ;
if ( result ! = VDO_SUCCESS ) {
if ( result = = VDO_PARAMETER_MISMATCH ) {
/*
2024-01-26 21:42:29 -05:00
* If we don ' t trap this case , vdo_status_to_errno ( ) will remap
2023-11-16 21:24:06 -05:00
* it to - EIO , which is misleading and ahistorical .
*/
result = - EINVAL ;
}
if ( result = = VDO_TOO_MANY_SLABS )
ti - > error = " Device vdo_prepare_to_grow_physical failed (specified physical size too big based on formatted slab size) " ;
else
ti - > error = " Device vdo_prepare_to_grow_physical failed " ;
return result ;
}
}
if ( strcmp ( config - > parent_device_name , vdo - > device_config - > parent_device_name ) ! = 0 ) {
const char * device_name = vdo_get_device_name ( config - > owning_target ) ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " Updating backing device of %s from %s to %s " , device_name ,
2023-11-16 21:24:06 -05:00
vdo - > device_config - > parent_device_name ,
config - > parent_device_name ) ;
}
return VDO_SUCCESS ;
}
static int update_existing_vdo ( const char * device_name , struct dm_target * ti ,
unsigned int argc , char * * argv , struct vdo * vdo )
{
int result ;
struct device_config * config ;
result = parse_device_config ( argc , argv , ti , & config ) ;
if ( result ! = VDO_SUCCESS )
return - EINVAL ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " preparing to modify device '%s' " , device_name ) ;
2023-11-16 21:24:06 -05:00
result = prepare_to_modify ( ti , config , vdo ) ;
if ( result ! = VDO_SUCCESS ) {
free_device_config ( config ) ;
2024-01-26 21:42:29 -05:00
return vdo_status_to_errno ( result ) ;
2023-11-16 21:24:06 -05:00
}
set_device_config ( ti , vdo , config ) ;
return VDO_SUCCESS ;
}
static int vdo_ctr ( struct dm_target * ti , unsigned int argc , char * * argv )
{
int result ;
struct registered_thread allocating_thread , instance_thread ;
const char * device_name ;
struct vdo * vdo ;
2024-02-13 10:55:50 -06:00
vdo_register_allocating_thread ( & allocating_thread , NULL ) ;
2023-11-16 21:24:06 -05:00
device_name = vdo_get_device_name ( ti ) ;
vdo = vdo_find_matching ( vdo_is_named , device_name ) ;
if ( vdo = = NULL ) {
result = construct_new_vdo ( ti , argc , argv ) ;
} else {
2024-02-09 14:53:05 -06:00
vdo_register_thread_device_id ( & instance_thread , & vdo - > instance ) ;
2023-11-16 21:24:06 -05:00
result = update_existing_vdo ( device_name , ti , argc , argv , vdo ) ;
2024-02-09 14:53:05 -06:00
vdo_unregister_thread_device_id ( ) ;
2023-11-16 21:24:06 -05:00
}
2024-02-13 10:55:50 -06:00
vdo_unregister_allocating_thread ( ) ;
2023-11-16 21:24:06 -05:00
return result ;
}
static void vdo_dtr ( struct dm_target * ti )
{
struct device_config * config = ti - > private ;
2024-02-13 10:55:50 -06:00
struct vdo * vdo = vdo_forget ( config - > vdo ) ;
2023-11-16 21:24:06 -05:00
list_del_init ( & config - > config_list ) ;
if ( list_empty ( & vdo - > device_config_list ) ) {
const char * device_name ;
/* This was the last config referencing the VDO. Free it. */
unsigned int instance = vdo - > instance ;
struct registered_thread allocating_thread , instance_thread ;
2024-02-09 14:53:05 -06:00
vdo_register_thread_device_id ( & instance_thread , & instance ) ;
2024-02-13 10:55:50 -06:00
vdo_register_allocating_thread ( & allocating_thread , NULL ) ;
2023-11-16 21:24:06 -05:00
device_name = vdo_get_device_name ( ti ) ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " stopping device '%s' " , device_name ) ;
2023-11-16 21:24:06 -05:00
if ( vdo - > dump_on_shutdown )
vdo_dump_all ( vdo , " device shutdown " ) ;
2024-02-13 10:55:50 -06:00
vdo_destroy ( vdo_forget ( vdo ) ) ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " device '%s' stopped " , device_name ) ;
2024-02-09 14:53:05 -06:00
vdo_unregister_thread_device_id ( ) ;
2024-02-13 10:55:50 -06:00
vdo_unregister_allocating_thread ( ) ;
2023-11-16 21:24:06 -05:00
release_instance ( instance ) ;
} else if ( config = = vdo - > device_config ) {
/*
* The VDO still references this config . Give it a reference to a config that isn ' t
* being destroyed .
*/
vdo - > device_config = list_first_entry ( & vdo - > device_config_list ,
struct device_config , config_list ) ;
}
free_device_config ( config ) ;
ti - > private = NULL ;
}
static void vdo_presuspend ( struct dm_target * ti )
{
get_vdo_for_target ( ti ) - > suspend_type =
( dm_noflush_suspending ( ti ) ? VDO_ADMIN_STATE_SUSPENDING : VDO_ADMIN_STATE_SAVING ) ;
}
/**
* write_super_block_for_suspend ( ) - Update the VDO state and save the super block .
* @ completion : The admin completion
*/
static void write_super_block_for_suspend ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
switch ( vdo_get_state ( vdo ) ) {
case VDO_DIRTY :
case VDO_NEW :
vdo_set_state ( vdo , VDO_CLEAN ) ;
break ;
case VDO_CLEAN :
case VDO_READ_ONLY_MODE :
case VDO_FORCE_REBUILD :
case VDO_RECOVERING :
case VDO_REBUILD_FOR_UPGRADE :
break ;
case VDO_REPLAYING :
default :
vdo_continue_completion ( completion , UDS_BAD_STATE ) ;
return ;
}
vdo_save_components ( vdo , completion ) ;
}
/**
* suspend_callback ( ) - Callback to initiate a suspend , registered in vdo_postsuspend ( ) .
* @ completion : The sub - task completion .
*/
static void suspend_callback ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
struct admin_state * state = & vdo - > admin . state ;
int result ;
assert_admin_phase_thread ( vdo , __func__ ) ;
switch ( advance_phase ( vdo ) ) {
case SUSPEND_PHASE_START :
if ( vdo_get_admin_state_code ( state ) - > quiescent ) {
/* Already suspended */
break ;
}
vdo_continue_completion ( completion ,
vdo_start_operation ( state , vdo - > suspend_type ) ) ;
return ;
case SUSPEND_PHASE_PACKER :
/*
* If the VDO was already resumed from a prior suspend while read - only , some of the
* components may not have been resumed . By setting a read - only error here , we
* guarantee that the result of this suspend will be VDO_READ_ONLY and not
* VDO_INVALID_ADMIN_STATE in that case .
*/
if ( vdo_in_read_only_mode ( vdo ) )
vdo_set_completion_result ( completion , VDO_READ_ONLY ) ;
vdo_drain_packer ( vdo - > packer , completion ) ;
return ;
case SUSPEND_PHASE_DATA_VIOS :
drain_data_vio_pool ( vdo - > data_vio_pool , completion ) ;
return ;
case SUSPEND_PHASE_DEDUPE :
vdo_drain_hash_zones ( vdo - > hash_zones , completion ) ;
return ;
case SUSPEND_PHASE_FLUSHES :
vdo_drain_flusher ( vdo - > flusher , completion ) ;
return ;
case SUSPEND_PHASE_LOGICAL_ZONES :
/*
* Attempt to flush all I / O before completing post suspend work . We believe a
* suspended device is expected to have persisted all data written before the
* suspend , even if it hasn ' t been flushed yet .
*/
result = vdo_synchronous_flush ( vdo ) ;
if ( result ! = VDO_SUCCESS )
vdo_enter_read_only_mode ( vdo , result ) ;
vdo_drain_logical_zones ( vdo - > logical_zones ,
vdo_get_admin_state_code ( state ) , completion ) ;
return ;
case SUSPEND_PHASE_BLOCK_MAP :
vdo_drain_block_map ( vdo - > block_map , vdo_get_admin_state_code ( state ) ,
completion ) ;
return ;
case SUSPEND_PHASE_JOURNAL :
vdo_drain_recovery_journal ( vdo - > recovery_journal ,
vdo_get_admin_state_code ( state ) , completion ) ;
return ;
case SUSPEND_PHASE_DEPOT :
vdo_drain_slab_depot ( vdo - > depot , vdo_get_admin_state_code ( state ) ,
completion ) ;
return ;
case SUSPEND_PHASE_READ_ONLY_WAIT :
vdo_wait_until_not_entering_read_only_mode ( completion ) ;
return ;
case SUSPEND_PHASE_WRITE_SUPER_BLOCK :
if ( vdo_is_state_suspending ( state ) | | ( completion - > result ! = VDO_SUCCESS ) ) {
/* If we didn't save the VDO or there was an error, we're done. */
break ;
}
write_super_block_for_suspend ( completion ) ;
return ;
case SUSPEND_PHASE_END :
break ;
default :
vdo_set_completion_result ( completion , UDS_BAD_STATE ) ;
}
finish_operation_callback ( completion ) ;
}
static void vdo_postsuspend ( struct dm_target * ti )
{
struct vdo * vdo = get_vdo_for_target ( ti ) ;
struct registered_thread instance_thread ;
const char * device_name ;
int result ;
2024-02-09 14:53:05 -06:00
vdo_register_thread_device_id ( & instance_thread , & vdo - > instance ) ;
2023-11-16 21:24:06 -05:00
device_name = vdo_get_device_name ( vdo - > device_config - > owning_target ) ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " suspending device '%s' " , device_name ) ;
2023-11-16 21:24:06 -05:00
/*
* It ' s important to note any error here does not actually stop device - mapper from
* suspending the device . All this work is done post suspend .
*/
result = perform_admin_operation ( vdo , SUSPEND_PHASE_START , suspend_callback ,
suspend_callback , " suspend " ) ;
if ( ( result = = VDO_SUCCESS ) | | ( result = = VDO_READ_ONLY ) ) {
/*
* Treat VDO_READ_ONLY as a success since a read - only suspension still leaves the
* VDO suspended .
*/
2024-02-14 09:22:04 -06:00
vdo_log_info ( " device '%s' suspended " , device_name ) ;
2023-11-16 21:24:06 -05:00
} else if ( result = = VDO_INVALID_ADMIN_STATE ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " Suspend invoked while in unexpected state: %s " ,
2023-11-16 21:24:06 -05:00
vdo_get_admin_state ( vdo ) - > name ) ;
} else {
2024-02-14 09:22:04 -06:00
vdo_log_error_strerror ( result , " Suspend of device '%s' failed " ,
2023-11-16 21:24:06 -05:00
device_name ) ;
}
2024-02-09 14:53:05 -06:00
vdo_unregister_thread_device_id ( ) ;
2023-11-16 21:24:06 -05:00
}
/**
* was_new ( ) - Check whether the vdo was new when it was loaded .
* @ vdo : The vdo to query .
*
* Return : true if the vdo was new .
*/
static bool was_new ( const struct vdo * vdo )
{
return ( vdo - > load_state = = VDO_NEW ) ;
}
/**
* requires_repair ( ) - Check whether a vdo requires recovery or rebuild .
* @ vdo : The vdo to query .
*
* Return : true if the vdo must be repaired .
*/
static bool __must_check requires_repair ( const struct vdo * vdo )
{
switch ( vdo_get_state ( vdo ) ) {
case VDO_DIRTY :
case VDO_FORCE_REBUILD :
case VDO_REPLAYING :
case VDO_REBUILD_FOR_UPGRADE :
return true ;
default :
return false ;
}
}
/**
* get_load_type ( ) - Determine how the slab depot was loaded .
* @ vdo : The vdo .
*
* Return : How the depot was loaded .
*/
static enum slab_depot_load_type get_load_type ( struct vdo * vdo )
{
if ( vdo_state_requires_read_only_rebuild ( vdo - > load_state ) )
return VDO_SLAB_DEPOT_REBUILD_LOAD ;
if ( vdo_state_requires_recovery ( vdo - > load_state ) )
return VDO_SLAB_DEPOT_RECOVERY_LOAD ;
return VDO_SLAB_DEPOT_NORMAL_LOAD ;
}
/**
* load_callback ( ) - Callback to do the destructive parts of loading a VDO .
* @ completion : The sub - task completion .
*/
static void load_callback ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
int result ;
assert_admin_phase_thread ( vdo , __func__ ) ;
switch ( advance_phase ( vdo ) ) {
case LOAD_PHASE_START :
result = vdo_start_operation ( & vdo - > admin . state , VDO_ADMIN_STATE_LOADING ) ;
if ( result ! = VDO_SUCCESS ) {
vdo_continue_completion ( completion , result ) ;
return ;
}
/* Prepare the recovery journal for new entries. */
vdo_open_recovery_journal ( vdo - > recovery_journal , vdo - > depot ,
vdo - > block_map ) ;
vdo_allow_read_only_mode_entry ( completion ) ;
return ;
case LOAD_PHASE_LOAD_DEPOT :
2024-02-10 11:05:15 -06:00
vdo_set_dedupe_state_normal ( vdo - > hash_zones ) ;
2023-11-16 21:24:06 -05:00
if ( vdo_is_read_only ( vdo ) ) {
/*
* In read - only mode we don ' t use the allocator and it may not even be
* readable , so don ' t bother trying to load it .
*/
vdo_set_completion_result ( completion , VDO_READ_ONLY ) ;
break ;
}
if ( requires_repair ( vdo ) ) {
vdo_repair ( completion ) ;
return ;
}
vdo_load_slab_depot ( vdo - > depot ,
( was_new ( vdo ) ? VDO_ADMIN_STATE_FORMATTING :
VDO_ADMIN_STATE_LOADING ) ,
completion , NULL ) ;
return ;
case LOAD_PHASE_MAKE_DIRTY :
vdo_set_state ( vdo , VDO_DIRTY ) ;
vdo_save_components ( vdo , completion ) ;
return ;
case LOAD_PHASE_PREPARE_TO_ALLOCATE :
vdo_initialize_block_map_from_journal ( vdo - > block_map ,
vdo - > recovery_journal ) ;
vdo_prepare_slab_depot_to_allocate ( vdo - > depot , get_load_type ( vdo ) ,
completion ) ;
return ;
case LOAD_PHASE_SCRUB_SLABS :
if ( vdo_state_requires_recovery ( vdo - > load_state ) )
vdo_enter_recovery_mode ( vdo ) ;
vdo_scrub_all_unrecovered_slabs ( vdo - > depot , completion ) ;
return ;
case LOAD_PHASE_DATA_REDUCTION :
WRITE_ONCE ( vdo - > compressing , vdo - > device_config - > compression ) ;
if ( vdo - > device_config - > deduplication ) {
/*
* Don ' t try to load or rebuild the index first ( and log scary error
* messages ) if this is known to be a newly - formatted volume .
*/
vdo_start_dedupe_index ( vdo - > hash_zones , was_new ( vdo ) ) ;
}
vdo - > allocations_allowed = false ;
fallthrough ;
case LOAD_PHASE_FINISHED :
break ;
case LOAD_PHASE_DRAIN_JOURNAL :
vdo_drain_recovery_journal ( vdo - > recovery_journal , VDO_ADMIN_STATE_SAVING ,
completion ) ;
return ;
case LOAD_PHASE_WAIT_FOR_READ_ONLY :
/* Avoid an infinite loop */
completion - > error_handler = NULL ;
vdo - > admin . phase = LOAD_PHASE_FINISHED ;
vdo_wait_until_not_entering_read_only_mode ( completion ) ;
return ;
default :
vdo_set_completion_result ( completion , UDS_BAD_STATE ) ;
}
finish_operation_callback ( completion ) ;
}
/**
* handle_load_error ( ) - Handle an error during the load operation .
* @ completion : The admin completion .
*
* If at all possible , brings the vdo online in read - only mode . This handler is registered in
* vdo_preresume_registered ( ) .
*/
static void handle_load_error ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
if ( vdo_requeue_completion_if_needed ( completion ,
vdo - > thread_config . admin_thread ) )
return ;
if ( vdo_state_requires_read_only_rebuild ( vdo - > load_state ) & &
( vdo - > admin . phase = = LOAD_PHASE_MAKE_DIRTY ) ) {
2024-02-14 09:22:04 -06:00
vdo_log_error_strerror ( completion - > result , " aborting load " ) ;
2023-11-16 21:24:06 -05:00
vdo - > admin . phase = LOAD_PHASE_DRAIN_JOURNAL ;
2024-02-13 10:55:50 -06:00
load_callback ( vdo_forget ( completion ) ) ;
2023-11-16 21:24:06 -05:00
return ;
}
2024-07-19 17:52:32 -04:00
if ( ( completion - > result = = VDO_UNSUPPORTED_VERSION ) & &
( vdo - > admin . phase = = LOAD_PHASE_MAKE_DIRTY ) ) {
vdo_log_error ( " Aborting load due to unsupported version " ) ;
vdo - > admin . phase = LOAD_PHASE_FINISHED ;
load_callback ( completion ) ;
return ;
}
2024-02-14 09:22:04 -06:00
vdo_log_error_strerror ( completion - > result ,
2023-11-16 21:24:06 -05:00
" Entering read-only mode due to load error " ) ;
vdo - > admin . phase = LOAD_PHASE_WAIT_FOR_READ_ONLY ;
vdo_enter_read_only_mode ( vdo , completion - > result ) ;
completion - > result = VDO_READ_ONLY ;
load_callback ( completion ) ;
}
/**
* write_super_block_for_resume ( ) - Update the VDO state and save the super block .
* @ completion : The admin completion
*/
static void write_super_block_for_resume ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
switch ( vdo_get_state ( vdo ) ) {
case VDO_CLEAN :
case VDO_NEW :
vdo_set_state ( vdo , VDO_DIRTY ) ;
vdo_save_components ( vdo , completion ) ;
return ;
case VDO_DIRTY :
case VDO_READ_ONLY_MODE :
case VDO_FORCE_REBUILD :
case VDO_RECOVERING :
case VDO_REBUILD_FOR_UPGRADE :
/* No need to write the super block in these cases */
vdo_launch_completion ( completion ) ;
return ;
case VDO_REPLAYING :
default :
vdo_continue_completion ( completion , UDS_BAD_STATE ) ;
}
}
/**
* resume_callback ( ) - Callback to resume a VDO .
* @ completion : The admin completion .
*/
static void resume_callback ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
int result ;
assert_admin_phase_thread ( vdo , __func__ ) ;
switch ( advance_phase ( vdo ) ) {
case RESUME_PHASE_START :
result = vdo_start_operation ( & vdo - > admin . state ,
VDO_ADMIN_STATE_RESUMING ) ;
if ( result ! = VDO_SUCCESS ) {
vdo_continue_completion ( completion , result ) ;
return ;
}
write_super_block_for_resume ( completion ) ;
return ;
case RESUME_PHASE_ALLOW_READ_ONLY_MODE :
vdo_allow_read_only_mode_entry ( completion ) ;
return ;
case RESUME_PHASE_DEDUPE :
vdo_resume_hash_zones ( vdo - > hash_zones , completion ) ;
return ;
case RESUME_PHASE_DEPOT :
vdo_resume_slab_depot ( vdo - > depot , completion ) ;
return ;
case RESUME_PHASE_JOURNAL :
vdo_resume_recovery_journal ( vdo - > recovery_journal , completion ) ;
return ;
case RESUME_PHASE_BLOCK_MAP :
vdo_resume_block_map ( vdo - > block_map , completion ) ;
return ;
case RESUME_PHASE_LOGICAL_ZONES :
vdo_resume_logical_zones ( vdo - > logical_zones , completion ) ;
return ;
case RESUME_PHASE_PACKER :
{
bool was_enabled = vdo_get_compressing ( vdo ) ;
bool enable = vdo - > device_config - > compression ;
if ( enable ! = was_enabled )
WRITE_ONCE ( vdo - > compressing , enable ) ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " compression is %s " , ( enable ? " enabled " : " disabled " ) ) ;
2023-11-16 21:24:06 -05:00
vdo_resume_packer ( vdo - > packer , completion ) ;
return ;
}
case RESUME_PHASE_FLUSHER :
vdo_resume_flusher ( vdo - > flusher , completion ) ;
return ;
case RESUME_PHASE_DATA_VIOS :
resume_data_vio_pool ( vdo - > data_vio_pool , completion ) ;
return ;
case RESUME_PHASE_END :
break ;
default :
vdo_set_completion_result ( completion , UDS_BAD_STATE ) ;
}
finish_operation_callback ( completion ) ;
}
/**
* grow_logical_callback ( ) - Callback to initiate a grow logical .
* @ completion : The admin completion .
*
* Registered in perform_grow_logical ( ) .
*/
static void grow_logical_callback ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
int result ;
assert_admin_phase_thread ( vdo , __func__ ) ;
switch ( advance_phase ( vdo ) ) {
case GROW_LOGICAL_PHASE_START :
if ( vdo_is_read_only ( vdo ) ) {
2024-02-14 09:22:04 -06:00
vdo_log_error_strerror ( VDO_READ_ONLY ,
2023-11-16 21:24:06 -05:00
" Can't grow logical size of a read-only VDO " ) ;
vdo_set_completion_result ( completion , VDO_READ_ONLY ) ;
break ;
}
result = vdo_start_operation ( & vdo - > admin . state ,
VDO_ADMIN_STATE_SUSPENDED_OPERATION ) ;
if ( result ! = VDO_SUCCESS ) {
vdo_continue_completion ( completion , result ) ;
return ;
}
vdo - > states . vdo . config . logical_blocks = vdo - > block_map - > next_entry_count ;
vdo_save_components ( vdo , completion ) ;
return ;
case GROW_LOGICAL_PHASE_GROW_BLOCK_MAP :
vdo_grow_block_map ( vdo - > block_map , completion ) ;
return ;
case GROW_LOGICAL_PHASE_END :
break ;
case GROW_LOGICAL_PHASE_ERROR :
vdo_enter_read_only_mode ( vdo , completion - > result ) ;
break ;
default :
vdo_set_completion_result ( completion , UDS_BAD_STATE ) ;
}
finish_operation_callback ( completion ) ;
}
/**
* handle_logical_growth_error ( ) - Handle an error during the grow physical process .
* @ completion : The admin completion .
*/
static void handle_logical_growth_error ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
if ( vdo - > admin . phase = = GROW_LOGICAL_PHASE_GROW_BLOCK_MAP ) {
/*
* We ' ve failed to write the new size in the super block , so set our in memory
* config back to the old size .
*/
vdo - > states . vdo . config . logical_blocks = vdo - > block_map - > entry_count ;
vdo_abandon_block_map_growth ( vdo - > block_map ) ;
}
vdo - > admin . phase = GROW_LOGICAL_PHASE_ERROR ;
grow_logical_callback ( completion ) ;
}
/**
* perform_grow_logical ( ) - Grow the logical size of the vdo .
* @ vdo : The vdo to grow .
* @ new_logical_blocks : The size to which the vdo should be grown .
*
* Context : This method may only be called when the vdo has been suspended and must not be called
* from a base thread .
*
* Return : VDO_SUCCESS or an error .
*/
static int perform_grow_logical ( struct vdo * vdo , block_count_t new_logical_blocks )
{
int result ;
if ( vdo - > device_config - > logical_blocks = = new_logical_blocks ) {
/*
* A table was loaded for which we prepared to grow , but a table without that
* growth was what we are resuming with .
*/
vdo_abandon_block_map_growth ( vdo - > block_map ) ;
return VDO_SUCCESS ;
}
2024-02-14 09:22:04 -06:00
vdo_log_info ( " Resizing logical to %llu " ,
2023-11-16 21:24:06 -05:00
( unsigned long long ) new_logical_blocks ) ;
if ( vdo - > block_map - > next_entry_count ! = new_logical_blocks )
return VDO_PARAMETER_MISMATCH ;
result = perform_admin_operation ( vdo , GROW_LOGICAL_PHASE_START ,
grow_logical_callback ,
handle_logical_growth_error , " grow logical " ) ;
if ( result ! = VDO_SUCCESS )
return result ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " Logical blocks now %llu " , ( unsigned long long ) new_logical_blocks ) ;
2023-11-16 21:24:06 -05:00
return VDO_SUCCESS ;
}
static void copy_callback ( int read_err , unsigned long write_err , void * context )
{
struct vdo_completion * completion = context ;
int result = ( ( ( read_err = = 0 ) & & ( write_err = = 0 ) ) ? VDO_SUCCESS : - EIO ) ;
vdo_continue_completion ( completion , result ) ;
}
static void partition_to_region ( struct partition * partition , struct vdo * vdo ,
struct dm_io_region * region )
{
physical_block_number_t pbn = partition - > offset - vdo - > geometry . bio_offset ;
* region = ( struct dm_io_region ) {
. bdev = vdo_get_backing_device ( vdo ) ,
. sector = pbn * VDO_SECTORS_PER_BLOCK ,
. count = partition - > count * VDO_SECTORS_PER_BLOCK ,
} ;
}
/**
* copy_partition ( ) - Copy a partition from the location specified in the current layout to that in
* the next layout .
* @ vdo : The vdo preparing to grow .
* @ id : The ID of the partition to copy .
* @ parent : The completion to notify when the copy is complete .
*/
static void copy_partition ( struct vdo * vdo , enum partition_id id ,
struct vdo_completion * parent )
{
struct dm_io_region read_region , write_regions [ 1 ] ;
struct partition * from = vdo_get_known_partition ( & vdo - > layout , id ) ;
struct partition * to = vdo_get_known_partition ( & vdo - > next_layout , id ) ;
partition_to_region ( from , vdo , & read_region ) ;
partition_to_region ( to , vdo , & write_regions [ 0 ] ) ;
dm_kcopyd_copy ( vdo - > partition_copier , & read_region , 1 , write_regions , 0 ,
copy_callback , parent ) ;
}
/**
* grow_physical_callback ( ) - Callback to initiate a grow physical .
* @ completion : The admin completion .
*
* Registered in perform_grow_physical ( ) .
*/
static void grow_physical_callback ( struct vdo_completion * completion )
{
struct vdo * vdo = completion - > vdo ;
int result ;
assert_admin_phase_thread ( vdo , __func__ ) ;
switch ( advance_phase ( vdo ) ) {
case GROW_PHYSICAL_PHASE_START :
if ( vdo_is_read_only ( vdo ) ) {
2024-02-14 09:22:04 -06:00
vdo_log_error_strerror ( VDO_READ_ONLY ,
2023-11-16 21:24:06 -05:00
" Can't grow physical size of a read-only VDO " ) ;
vdo_set_completion_result ( completion , VDO_READ_ONLY ) ;
break ;
}
result = vdo_start_operation ( & vdo - > admin . state ,
VDO_ADMIN_STATE_SUSPENDED_OPERATION ) ;
if ( result ! = VDO_SUCCESS ) {
vdo_continue_completion ( completion , result ) ;
return ;
}
/* Copy the journal into the new layout. */
copy_partition ( vdo , VDO_RECOVERY_JOURNAL_PARTITION , completion ) ;
return ;
case GROW_PHYSICAL_PHASE_COPY_SUMMARY :
copy_partition ( vdo , VDO_SLAB_SUMMARY_PARTITION , completion ) ;
return ;
case GROW_PHYSICAL_PHASE_UPDATE_COMPONENTS :
vdo_uninitialize_layout ( & vdo - > layout ) ;
vdo - > layout = vdo - > next_layout ;
2024-02-13 10:55:50 -06:00
vdo_forget ( vdo - > next_layout . head ) ;
2023-11-16 21:24:06 -05:00
vdo - > states . vdo . config . physical_blocks = vdo - > layout . size ;
vdo_update_slab_depot_size ( vdo - > depot ) ;
vdo_save_components ( vdo , completion ) ;
return ;
case GROW_PHYSICAL_PHASE_USE_NEW_SLABS :
vdo_use_new_slabs ( vdo - > depot , completion ) ;
return ;
case GROW_PHYSICAL_PHASE_END :
vdo - > depot - > summary_origin =
vdo_get_known_partition ( & vdo - > layout ,
VDO_SLAB_SUMMARY_PARTITION ) - > offset ;
vdo - > recovery_journal - > origin =
vdo_get_known_partition ( & vdo - > layout ,
VDO_RECOVERY_JOURNAL_PARTITION ) - > offset ;
break ;
case GROW_PHYSICAL_PHASE_ERROR :
vdo_enter_read_only_mode ( vdo , completion - > result ) ;
break ;
default :
vdo_set_completion_result ( completion , UDS_BAD_STATE ) ;
}
vdo_uninitialize_layout ( & vdo - > next_layout ) ;
finish_operation_callback ( completion ) ;
}
/**
* handle_physical_growth_error ( ) - Handle an error during the grow physical process .
* @ completion : The sub - task completion .
*/
static void handle_physical_growth_error ( struct vdo_completion * completion )
{
completion - > vdo - > admin . phase = GROW_PHYSICAL_PHASE_ERROR ;
grow_physical_callback ( completion ) ;
}
/**
* perform_grow_physical ( ) - Grow the physical size of the vdo .
* @ vdo : The vdo to resize .
* @ new_physical_blocks : The new physical size in blocks .
*
* Context : This method may only be called when the vdo has been suspended and must not be called
* from a base thread .
*
* Return : VDO_SUCCESS or an error .
*/
static int perform_grow_physical ( struct vdo * vdo , block_count_t new_physical_blocks )
{
int result ;
block_count_t new_depot_size , prepared_depot_size ;
block_count_t old_physical_blocks = vdo - > states . vdo . config . physical_blocks ;
/* Skip any noop grows. */
if ( old_physical_blocks = = new_physical_blocks )
return VDO_SUCCESS ;
if ( new_physical_blocks ! = vdo - > next_layout . size ) {
/*
* Either the VDO isn ' t prepared to grow , or it was prepared to grow to a different
* size . Doing this check here relies on the fact that the call to this method is
* done under the dmsetup message lock .
*/
vdo_uninitialize_layout ( & vdo - > next_layout ) ;
vdo_abandon_new_slabs ( vdo - > depot ) ;
return VDO_PARAMETER_MISMATCH ;
}
/* Validate that we are prepared to grow appropriately. */
new_depot_size =
vdo_get_known_partition ( & vdo - > next_layout , VDO_SLAB_DEPOT_PARTITION ) - > count ;
prepared_depot_size = ( vdo - > depot - > new_slabs = = NULL ) ? 0 : vdo - > depot - > new_size ;
if ( prepared_depot_size ! = new_depot_size )
return VDO_PARAMETER_MISMATCH ;
result = perform_admin_operation ( vdo , GROW_PHYSICAL_PHASE_START ,
grow_physical_callback ,
handle_physical_growth_error , " grow physical " ) ;
if ( result ! = VDO_SUCCESS )
return result ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " Physical block count was %llu, now %llu " ,
2023-11-16 21:24:06 -05:00
( unsigned long long ) old_physical_blocks ,
( unsigned long long ) new_physical_blocks ) ;
return VDO_SUCCESS ;
}
/**
* apply_new_vdo_configuration ( ) - Attempt to make any configuration changes from the table being
* resumed .
* @ vdo : The vdo being resumed .
* @ config : The new device configuration derived from the table with which the vdo is being
* resumed .
*
* Return : VDO_SUCCESS or an error .
*/
static int __must_check apply_new_vdo_configuration ( struct vdo * vdo ,
struct device_config * config )
{
int result ;
result = perform_grow_logical ( vdo , config - > logical_blocks ) ;
if ( result ! = VDO_SUCCESS ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " grow logical operation failed, result = %d " , result ) ;
2023-11-16 21:24:06 -05:00
return result ;
}
result = perform_grow_physical ( vdo , config - > physical_blocks ) ;
if ( result ! = VDO_SUCCESS )
2024-02-14 09:22:04 -06:00
vdo_log_error ( " resize operation failed, result = %d " , result ) ;
2023-11-16 21:24:06 -05:00
return result ;
}
static int vdo_preresume_registered ( struct dm_target * ti , struct vdo * vdo )
{
struct device_config * config = ti - > private ;
const char * device_name = vdo_get_device_name ( ti ) ;
block_count_t backing_blocks ;
int result ;
backing_blocks = get_underlying_device_block_count ( vdo ) ;
if ( backing_blocks < config - > physical_blocks ) {
/* FIXME: can this still happen? */
2024-02-14 09:22:04 -06:00
vdo_log_error ( " resume of device '%s' failed: backing device has %llu blocks but VDO physical size is %llu blocks " ,
2023-11-16 21:24:06 -05:00
device_name , ( unsigned long long ) backing_blocks ,
( unsigned long long ) config - > physical_blocks ) ;
return - EINVAL ;
}
if ( vdo_get_admin_state ( vdo ) = = VDO_ADMIN_STATE_PRE_LOADED ) {
2024-02-14 09:22:04 -06:00
vdo_log_info ( " starting device '%s' " , device_name ) ;
2023-11-16 21:24:06 -05:00
result = perform_admin_operation ( vdo , LOAD_PHASE_START , load_callback ,
handle_load_error , " load " ) ;
2024-07-19 17:52:32 -04:00
if ( result = = VDO_UNSUPPORTED_VERSION ) {
/*
* A component version is not supported . This can happen when the
* recovery journal metadata is in an old version format . Abort the
* load without saving the state .
*/
vdo - > suspend_type = VDO_ADMIN_STATE_SUSPENDING ;
perform_admin_operation ( vdo , SUSPEND_PHASE_START ,
suspend_callback , suspend_callback ,
" suspend " ) ;
return result ;
}
2023-11-16 21:24:06 -05:00
if ( ( result ! = VDO_SUCCESS ) & & ( result ! = VDO_READ_ONLY ) ) {
/*
* Something has gone very wrong . Make sure everything has drained and
* leave the device in an unresumable state .
*/
2024-02-14 09:22:04 -06:00
vdo_log_error_strerror ( result ,
2023-11-16 21:24:06 -05:00
" Start failed, could not load VDO metadata " ) ;
vdo - > suspend_type = VDO_ADMIN_STATE_STOPPING ;
perform_admin_operation ( vdo , SUSPEND_PHASE_START ,
suspend_callback , suspend_callback ,
" suspend " ) ;
return result ;
}
/* Even if the VDO is read-only, it is now able to handle read requests. */
2024-02-14 09:22:04 -06:00
vdo_log_info ( " device '%s' started " , device_name ) ;
2023-11-16 21:24:06 -05:00
}
2024-02-14 09:22:04 -06:00
vdo_log_info ( " resuming device '%s' " , device_name ) ;
2023-11-16 21:24:06 -05:00
/* If this fails, the VDO was not in a state to be resumed. This should never happen. */
result = apply_new_vdo_configuration ( vdo , config ) ;
BUG_ON ( result = = VDO_INVALID_ADMIN_STATE ) ;
/*
* Now that we ' ve tried to modify the vdo , the new config * is * the config , whether the
* modifications worked or not .
*/
vdo - > device_config = config ;
/*
* Any error here is highly unexpected and the state of the vdo is questionable , so we mark
* it read - only in memory . Because we are suspended , the read - only state will not be
* written to disk .
*/
if ( result ! = VDO_SUCCESS ) {
2024-02-14 09:22:04 -06:00
vdo_log_error_strerror ( result ,
2023-11-16 21:24:06 -05:00
" Commit of modifications to device '%s' failed " ,
device_name ) ;
vdo_enter_read_only_mode ( vdo , result ) ;
return result ;
}
if ( vdo_get_admin_state ( vdo ) - > normal ) {
/* The VDO was just started, so we don't need to resume it. */
return VDO_SUCCESS ;
}
result = perform_admin_operation ( vdo , RESUME_PHASE_START , resume_callback ,
resume_callback , " resume " ) ;
BUG_ON ( result = = VDO_INVALID_ADMIN_STATE ) ;
if ( result = = VDO_READ_ONLY ) {
/* Even if the vdo is read-only, it has still resumed. */
result = VDO_SUCCESS ;
}
if ( result ! = VDO_SUCCESS )
2024-02-14 09:22:04 -06:00
vdo_log_error ( " resume of device '%s' failed with error: %d " , device_name ,
2023-11-16 21:24:06 -05:00
result ) ;
return result ;
}
static int vdo_preresume ( struct dm_target * ti )
{
struct registered_thread instance_thread ;
struct vdo * vdo = get_vdo_for_target ( ti ) ;
int result ;
2024-02-09 14:53:05 -06:00
vdo_register_thread_device_id ( & instance_thread , & vdo - > instance ) ;
2023-11-16 21:24:06 -05:00
result = vdo_preresume_registered ( ti , vdo ) ;
2024-07-19 17:52:32 -04:00
if ( ( result = = VDO_PARAMETER_MISMATCH ) | | ( result = = VDO_INVALID_ADMIN_STATE ) | |
( result = = VDO_UNSUPPORTED_VERSION ) )
2023-11-16 21:24:06 -05:00
result = - EINVAL ;
2024-02-09 14:53:05 -06:00
vdo_unregister_thread_device_id ( ) ;
2024-01-26 21:42:29 -05:00
return vdo_status_to_errno ( result ) ;
2023-11-16 21:24:06 -05:00
}
static void vdo_resume ( struct dm_target * ti )
{
struct registered_thread instance_thread ;
2024-02-09 14:53:05 -06:00
vdo_register_thread_device_id ( & instance_thread ,
2023-11-16 21:24:06 -05:00
& get_vdo_for_target ( ti ) - > instance ) ;
2024-02-14 09:22:04 -06:00
vdo_log_info ( " device '%s' resumed " , vdo_get_device_name ( ti ) ) ;
2024-02-09 14:53:05 -06:00
vdo_unregister_thread_device_id ( ) ;
2023-11-16 21:24:06 -05:00
}
/*
* If anything changes that affects how user tools will interact with vdo , update the version
* number and make sure documentation about the change is complete so tools can properly update
* their management code .
*/
static struct target_type vdo_target_bio = {
. features = DM_TARGET_SINGLETON ,
. name = " vdo " ,
2024-07-18 14:02:38 -04:00
. version = { 9 , 1 , 0 } ,
2023-11-16 21:24:06 -05:00
. module = THIS_MODULE ,
. ctr = vdo_ctr ,
. dtr = vdo_dtr ,
. io_hints = vdo_io_hints ,
. iterate_devices = vdo_iterate_devices ,
. map = vdo_map_bio ,
. message = vdo_message ,
. status = vdo_status ,
. presuspend = vdo_presuspend ,
. postsuspend = vdo_postsuspend ,
. preresume = vdo_preresume ,
. resume = vdo_resume ,
} ;
static bool dm_registered ;
static void vdo_module_destroy ( void )
{
2024-02-14 09:22:04 -06:00
vdo_log_debug ( " unloading " ) ;
2023-11-16 21:24:06 -05:00
if ( dm_registered )
dm_unregister_target ( & vdo_target_bio ) ;
2024-02-13 14:57:33 -06:00
VDO_ASSERT_LOG_ONLY ( instances . count = = 0 ,
" should have no instance numbers still in use, but have %u " ,
instances . count ) ;
2024-02-13 10:55:50 -06:00
vdo_free ( instances . words ) ;
2023-11-16 21:24:06 -05:00
memset ( & instances , 0 , sizeof ( struct instance_tracker ) ) ;
}
static int __init vdo_init ( void )
{
int result = 0 ;
2024-02-22 20:44:29 -05:00
/* Memory tracking must be initialized first for accurate accounting. */
2024-02-13 10:55:50 -06:00
vdo_memory_init ( ) ;
2024-03-01 18:29:05 -05:00
vdo_initialize_threads_mutex ( ) ;
2024-02-09 14:53:05 -06:00
vdo_initialize_thread_device_registry ( ) ;
2023-11-16 21:24:06 -05:00
vdo_initialize_device_registry_once ( ) ;
2024-02-22 20:44:29 -05:00
/* Add VDO errors to the set of errors registered by the indexer. */
2023-11-16 21:24:06 -05:00
result = vdo_register_status_codes ( ) ;
2024-02-13 16:03:47 -06:00
if ( result ! = VDO_SUCCESS ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " vdo_register_status_codes failed %d " , result ) ;
2023-11-16 21:24:06 -05:00
vdo_module_destroy ( ) ;
return result ;
}
result = dm_register_target ( & vdo_target_bio ) ;
if ( result < 0 ) {
2024-02-14 09:22:04 -06:00
vdo_log_error ( " dm_register_target failed %d " , result ) ;
2023-11-16 21:24:06 -05:00
vdo_module_destroy ( ) ;
return result ;
}
dm_registered = true ;
return result ;
}
static void __exit vdo_exit ( void )
{
vdo_module_destroy ( ) ;
2024-02-22 20:44:29 -05:00
/* Memory tracking cleanup must be done last. */
2024-02-13 10:55:50 -06:00
vdo_memory_exit ( ) ;
2023-11-16 21:24:06 -05:00
}
module_init ( vdo_init ) ;
module_exit ( vdo_exit ) ;
2024-02-10 10:42:00 -06:00
module_param_named ( log_level , vdo_log_level , uint , 0644 ) ;
MODULE_PARM_DESC ( log_level , " Log level for log messages " ) ;
2023-11-16 21:24:06 -05:00
MODULE_DESCRIPTION ( DM_NAME " target for transparent deduplication " ) ;
MODULE_AUTHOR ( " Red Hat, Inc. " ) ;
MODULE_LICENSE ( " GPL " ) ;