NFS client updates for Linux 5.13

Highlights include:
 
 Stable fixes:
 - Add validation of the UDP retrans parameter to prevent shift out-of-bounds
 - Don't discard pNFS layout segments that are marked for return
 
 Bugfixes:
 - Fix a NULL dereference crash in xprt_complete_bc_request() when the
   NFSv4.1 server misbehaves.
 - Fix the handling of NFS READDIR cookie verifiers
 - Sundry fixes to ensure attribute revalidation works correctly when the
   server does not return post-op attributes.
 - nfs4_bitmask_adjust() must not change the server global bitmasks
 - Fix major timeout handling in the RPC code.
 - NFSv4.2 fallocate() fixes.
 - Fix the NFSv4.2 SEEK_HOLE/SEEK_DATA end-of-file handling
 - Copy offload attribute revalidation fixes
 - Fix an incorrect filehandle size check in the pNFS flexfiles driver
 - Fix several RDMA transport setup/teardown races
 - Fix several RDMA queue wrapping issues
 - Fix a misplaced memory read barrier in sunrpc's call_decode()
 
 Features:
 - Micro optimisation of the TCP transmission queue using TCP_CORK
 - statx() performance improvements by further splitting up the tracking
   of invalid cached file metadata.
 - Support the NFSv4.2 "change_attr_type" attribute and use it to
   optimise handling of change attribute updates.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEESQctxSBg8JpV8KqEZwvnipYKAPIFAmCVLooACgkQZwvnipYK
 APJB5BAAtIJyhx40ooMBzcucDmXd1qovlKsb8ZlvnSI6c7wvHhFPNk9z4zwThnjL
 FpVYzJzK6XzAQY/PtgbrPwnSUmW925ngPWYR/hiYe+OGPBnYV+tXP8izCyEkNgMg
 45goDOxojGWl7AGTuAJiKcDSdH9PyIrbvt28iwcNSGjslasGSbAoL/836l4OIGr1
 Ymxs/NDML11dPco8GIKLGtHd8leFGleDx089VeNsgud8MdaFErp16O5Iz8DdzRKd
 W1l2zDMb05j8eDZIfy3w3FyrLkDXA+KgLSADiC8TcpxoadPaQJMeCvoIq8oqVndn
 bZBoxduXdLgf54Aec0WnNKFAOyc7pGvZoSNmFouT7EGV73g+g1LQ+ZbEE1bb8fCQ
 XHqCVaBt2+47NiTUgdxjXlZRfcn8fYKx0tVxfG3mQVMXUAWfsjmMyQMNgijDRJI2
 8Wz3lZMRGMILbR9j4QpP1biVy/2zGNWG/TB5ZZyZMSY4uT+aOpzlqdknb4UsRaSp
 f7MfmB7xEWpS4DJr9RIBrJ/hIdnMu1mNInxDPFo5Kl5HNp4TaPm2dPir2ZD2wMZI
 daURTX7giUhpE15ZebQDBqWD+mTR0bVDqLLeo131JRmMfMEHugNrr49xe+NkBu/R
 QWnFzgkGdQsOeiKRRwEUuhsi74JspqfwzdZzHqcRM5WuXVvBLcA=
 =h01b
 -----END PGP SIGNATURE-----

Merge tag 'nfs-for-5.13-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:

   - Add validation of the UDP retrans parameter to prevent shift
     out-of-bounds

   - Don't discard pNFS layout segments that are marked for return

  Bugfixes:

   - Fix a NULL dereference crash in xprt_complete_bc_request() when the
     NFSv4.1 server misbehaves.

   - Fix the handling of NFS READDIR cookie verifiers

   - Sundry fixes to ensure attribute revalidation works correctly when
     the server does not return post-op attributes.

   - nfs4_bitmask_adjust() must not change the server global bitmasks

   - Fix major timeout handling in the RPC code.

   - NFSv4.2 fallocate() fixes.

   - Fix the NFSv4.2 SEEK_HOLE/SEEK_DATA end-of-file handling

   - Copy offload attribute revalidation fixes

   - Fix an incorrect filehandle size check in the pNFS flexfiles driver

   - Fix several RDMA transport setup/teardown races

   - Fix several RDMA queue wrapping issues

   - Fix a misplaced memory read barrier in sunrpc's call_decode()

  Features:

   - Micro optimisation of the TCP transmission queue using TCP_CORK

   - statx() performance improvements by further splitting up the
     tracking of invalid cached file metadata.

   - Support the NFSv4.2 'change_attr_type' attribute and use it to
     optimise handling of change attribute updates"

* tag 'nfs-for-5.13-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (85 commits)
  xprtrdma: Fix a NULL dereference in frwr_unmap_sync()
  sunrpc: Fix misplaced barrier in call_decode
  NFSv4.2: Remove ifdef CONFIG_NFSD from NFSv4.2 client SSC code.
  xprtrdma: Move fr_mr field to struct rpcrdma_mr
  xprtrdma: Move the Work Request union to struct rpcrdma_mr
  xprtrdma: Move fr_linv_done field to struct rpcrdma_mr
  xprtrdma: Move cqe to struct rpcrdma_mr
  xprtrdma: Move fr_cid to struct rpcrdma_mr
  xprtrdma: Remove the RPC/RDMA QP event handler
  xprtrdma: Don't display r_xprt memory addresses in tracepoints
  xprtrdma: Add an rpcrdma_mr_completion_class
  xprtrdma: Add tracepoints showing FastReg WRs and remote invalidation
  xprtrdma: Avoid Send Queue wrapping
  xprtrdma: Do not wake RPC consumer on a failed LocalInv
  xprtrdma: Do not recycle MR after FastReg/LocalInv flushes
  xprtrdma: Clarify use of barrier in frwr_wc_localinv_done()
  xprtrdma: Rename frwr_release_mr()
  xprtrdma: rpcrdma_mr_pop() already does list_del_init()
  xprtrdma: Delete rpcrdma_recv_buffer_put()
  xprtrdma: Fix cwnd update ordering
  ...
This commit is contained in:
Linus Torvalds 2021-05-07 11:23:41 -07:00
commit a647034fe2
48 changed files with 1120 additions and 723 deletions

View file

@ -338,8 +338,8 @@ config NFS_COMMON
default y default y
config NFS_V4_2_SSC_HELPER config NFS_V4_2_SSC_HELPER
tristate bool
default y if NFS_V4=y || NFS_FS=y default y if NFS_V4_2
source "net/sunrpc/Kconfig" source "net/sunrpc/Kconfig"
source "fs/ceph/Kconfig" source "fs/ceph/Kconfig"

View file

@ -137,12 +137,12 @@ static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp,
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) { list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
if (!pnfs_layout_is_valid(lo)) if (!pnfs_layout_is_valid(lo))
continue; continue;
if (stateid != NULL && if (!nfs4_stateid_match_other(stateid, &lo->plh_stateid))
!nfs4_stateid_match_other(stateid, &lo->plh_stateid))
continue; continue;
if (!nfs_sb_active(server->super)) if (nfs_sb_active(server->super))
continue; inode = igrab(lo->plh_inode);
inode = igrab(lo->plh_inode); else
inode = ERR_PTR(-EAGAIN);
rcu_read_unlock(); rcu_read_unlock();
if (inode) if (inode)
return inode; return inode;
@ -176,9 +176,10 @@ static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp,
continue; continue;
if (nfsi->layout != lo) if (nfsi->layout != lo)
continue; continue;
if (!nfs_sb_active(server->super)) if (nfs_sb_active(server->super))
continue; inode = igrab(lo->plh_inode);
inode = igrab(lo->plh_inode); else
inode = ERR_PTR(-EAGAIN);
rcu_read_unlock(); rcu_read_unlock();
if (inode) if (inode)
return inode; return inode;

View file

@ -476,7 +476,6 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
to->to_maxval = to->to_initval; to->to_maxval = to->to_initval;
to->to_exponential = 0; to->to_exponential = 0;
break; break;
#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
case XPRT_TRANSPORT_UDP: case XPRT_TRANSPORT_UDP:
if (retrans == NFS_UNSPEC_RETRANS) if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_UDP_RETRANS; to->to_retries = NFS_DEF_UDP_RETRANS;
@ -487,7 +486,6 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
to->to_maxval = NFS_MAX_UDP_TIMEOUT; to->to_maxval = NFS_MAX_UDP_TIMEOUT;
to->to_exponential = 1; to->to_exponential = 1;
break; break;
#endif
default: default:
BUG(); BUG();
} }
@ -698,9 +696,18 @@ static int nfs_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */ /* Initialise the client representation from the mount data */
server->flags = ctx->flags; server->flags = ctx->flags;
server->options = ctx->options; server->options = ctx->options;
server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID| server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME; switch (clp->rpc_ops->version) {
case 2:
server->fattr_valid = NFS_ATTR_FATTR_V2;
break;
case 3:
server->fattr_valid = NFS_ATTR_FATTR_V3;
break;
default:
server->fattr_valid = NFS_ATTR_FATTR_V4;
}
if (ctx->rsize) if (ctx->rsize)
server->rsize = nfs_block_size(ctx->rsize, NULL); server->rsize = nfs_block_size(ctx->rsize, NULL);
@ -794,6 +801,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->maxfilesize = fsinfo->maxfilesize; server->maxfilesize = fsinfo->maxfilesize;
server->time_delta = fsinfo->time_delta; server->time_delta = fsinfo->time_delta;
server->change_attr_type = fsinfo->change_attr_type;
server->clone_blksize = fsinfo->clone_blksize; server->clone_blksize = fsinfo->clone_blksize;
/* We're airborne Set socket buffersize */ /* We're airborne Set socket buffersize */
@ -935,6 +943,8 @@ struct nfs_server *nfs_alloc_server(void)
return NULL; return NULL;
} }
server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
ida_init(&server->openowner_id); ida_init(&server->openowner_id);
ida_init(&server->lockowner_id); ida_init(&server->lockowner_id);
pnfs_init_server(server); pnfs_init_server(server);

View file

@ -114,7 +114,7 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
return ret; return ret;
} }
/** /**
* nfs_have_delegation - check if inode has a delegation, mark it * nfs4_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one. * NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check * @inode: inode to check
* @flags: delegation types to check for * @flags: delegation types to check for
@ -481,6 +481,22 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
if (freeme == NULL) if (freeme == NULL)
goto out; goto out;
add_new: add_new:
/*
* If we didn't revalidate the change attribute before setting
* the delegation, then pre-emptively ask for a full attribute
* cache revalidation.
*/
spin_lock(&inode->i_lock);
if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_CHANGE)
nfs_set_cache_invalid(inode,
NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_OTHER | NFS_INO_INVALID_DATA |
NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_XATTR);
spin_unlock(&inode->i_lock);
list_add_tail_rcu(&delegation->super_list, &server->delegations); list_add_tail_rcu(&delegation->super_list, &server->delegations);
rcu_assign_pointer(nfsi->delegation, delegation); rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL; delegation = NULL;
@ -488,11 +504,6 @@ add_new:
atomic_long_inc(&nfs_active_delegations); atomic_long_inc(&nfs_active_delegations);
trace_nfs4_set_delegation(inode, type); trace_nfs4_set_delegation(inode, type);
spin_lock(&inode->i_lock);
if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME))
NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED;
spin_unlock(&inode->i_lock);
out: out:
spin_unlock(&clp->cl_lock); spin_unlock(&clp->cl_lock);
if (delegation != NULL) if (delegation != NULL)
@ -674,7 +685,7 @@ void nfs_inode_evict_delegation(struct inode *inode)
} }
/** /**
* nfs_inode_return_delegation - synchronously return a delegation * nfs4_inode_return_delegation - synchronously return a delegation
* @inode: inode to process * @inode: inode to process
* *
* This routine will always flush any dirty data to disk on the * This routine will always flush any dirty data to disk on the
@ -697,7 +708,7 @@ int nfs4_inode_return_delegation(struct inode *inode)
} }
/** /**
* nfs_inode_return_delegation_on_close - asynchronously return a delegation * nfs4_inode_return_delegation_on_close - asynchronously return a delegation
* @inode: inode to process * @inode: inode to process
* *
* This routine is called on file close in order to determine if the * This routine is called on file close in order to determine if the
@ -811,7 +822,7 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
} }
/** /**
* nfs_super_return_all_delegations - return delegations for one superblock * nfs_server_return_all_delegations - return delegations for one superblock
* @server: pointer to nfs_server to process * @server: pointer to nfs_server to process
* *
*/ */

View file

@ -84,8 +84,7 @@ int nfs4_inode_make_writeable(struct inode *inode);
static inline int nfs_have_delegated_attributes(struct inode *inode) static inline int nfs_have_delegated_attributes(struct inode *inode)
{ {
return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) && return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
!(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED);
} }
#endif #endif

View file

@ -866,6 +866,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
break; break;
} }
verf_arg = verf_res;
status = nfs_readdir_page_filler(desc, entry, pages, pglen, status = nfs_readdir_page_filler(desc, entry, pages, pglen,
arrays, narrays); arrays, narrays);
} while (!status && nfs_readdir_page_needs_filling(page)); } while (!status && nfs_readdir_page_needs_filling(page));
@ -927,7 +929,12 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
} }
return res; return res;
} }
memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf)); /*
* Set the cookie verifier if the page cache was empty
*/
if (desc->page_index == 0)
memcpy(nfsi->cookieverf, verf,
sizeof(nfsi->cookieverf));
} }
res = nfs_readdir_search_array(desc); res = nfs_readdir_search_array(desc);
if (res == 0) { if (res == 0) {
@ -974,10 +981,10 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
/* /*
* Once we've found the start of the dirent within a page: fill 'er up... * Once we've found the start of the dirent within a page: fill 'er up...
*/ */
static void nfs_do_filldir(struct nfs_readdir_descriptor *desc) static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
const __be32 *verf)
{ {
struct file *file = desc->file; struct file *file = desc->file;
struct nfs_inode *nfsi = NFS_I(file_inode(file));
struct nfs_cache_array *array; struct nfs_cache_array *array;
unsigned int i = 0; unsigned int i = 0;
@ -991,7 +998,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
desc->eof = true; desc->eof = true;
break; break;
} }
memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf)); memcpy(desc->verf, verf, sizeof(desc->verf));
if (i < (array->size-1)) if (i < (array->size-1))
desc->dir_cookie = array->array[i+1].cookie; desc->dir_cookie = array->array[i+1].cookie;
else else
@ -1048,7 +1055,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc)
for (i = 0; !desc->eof && i < sz && arrays[i]; i++) { for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
desc->page = arrays[i]; desc->page = arrays[i];
nfs_do_filldir(desc); nfs_do_filldir(desc, verf);
} }
desc->page = NULL; desc->page = NULL;
@ -1069,6 +1076,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
{ {
struct dentry *dentry = file_dentry(file); struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry); struct inode *inode = d_inode(dentry);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_dir_context *dir_ctx = file->private_data; struct nfs_open_dir_context *dir_ctx = file->private_data;
struct nfs_readdir_descriptor *desc; struct nfs_readdir_descriptor *desc;
int res; int res;
@ -1122,7 +1130,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
break; break;
} }
if (res == -ETOOSMALL && desc->plus) { if (res == -ETOOSMALL && desc->plus) {
clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
nfs_zap_caches(inode); nfs_zap_caches(inode);
desc->page_index = 0; desc->page_index = 0;
desc->plus = false; desc->plus = false;
@ -1132,7 +1140,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
if (res < 0) if (res < 0)
break; break;
nfs_do_filldir(desc); nfs_do_filldir(desc, nfsi->cookieverf);
nfs_readdir_page_unlock_and_put_cached(desc); nfs_readdir_page_unlock_and_put_cached(desc);
} while (!desc->eof); } while (!desc->eof);
@ -1703,7 +1711,7 @@ static void nfs_drop_nlink(struct inode *inode)
NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter(); NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
nfs_set_cache_invalid( nfs_set_cache_invalid(
inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME | inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED); NFS_INO_INVALID_NLINK);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
@ -2940,7 +2948,7 @@ static int nfs_execute_ok(struct inode *inode, int mask)
if (S_ISDIR(inode->i_mode)) if (S_ISDIR(inode->i_mode))
return 0; return 0;
if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) { if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
if (mask & MAY_NOT_BLOCK) if (mask & MAY_NOT_BLOCK)
return -ECHILD; return -ECHILD;
ret = __nfs_revalidate_inode(server, inode); ret = __nfs_revalidate_inode(server, inode);
@ -2998,7 +3006,8 @@ out_notsup:
if (mask & MAY_NOT_BLOCK) if (mask & MAY_NOT_BLOCK)
return -ECHILD; return -ECHILD;
res = nfs_revalidate_inode(NFS_SERVER(inode), inode); res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
NFS_INO_INVALID_OTHER);
if (res == 0) if (res == 0)
res = generic_permission(&init_user_ns, inode, mask); res = generic_permission(&init_user_ns, inode, mask);
goto out; goto out;

View file

@ -169,19 +169,8 @@ out:
static u64 nfs_fetch_iversion(struct inode *inode) static u64 nfs_fetch_iversion(struct inode *inode)
{ {
struct nfs_server *server = NFS_SERVER(inode); nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
return inode_peek_iversion_raw(inode);
/* Is this the right call?: */
nfs_revalidate_inode(server, inode);
/*
* Also, note we're ignoring any returned error. That seems to be
* the practice for cache consistency information elsewhere in
* the server, but I'm not sure why.
*/
if (server->nfs_client->rpc_ops->version >= 4)
return inode_peek_iversion_raw(inode);
else
return time_to_chattr(&inode->i_ctime);
} }
const struct export_operations nfs_export_ops = { const struct export_operations nfs_export_ops = {

View file

@ -105,7 +105,7 @@ static int nfs_revalidate_file_size(struct inode *inode, struct file *filp)
if (filp->f_flags & O_DIRECT) if (filp->f_flags & O_DIRECT)
goto force_reval; goto force_reval;
if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE)) if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE))
goto force_reval; goto force_reval;
return 0; return 0;
force_reval: force_reval:

View file

@ -106,7 +106,7 @@ static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
if (unlikely(!p)) if (unlikely(!p))
return -ENOBUFS; return -ENOBUFS;
fh->size = be32_to_cpup(p++); fh->size = be32_to_cpup(p++);
if (fh->size > sizeof(struct nfs_fh)) { if (fh->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
fh->size); fh->size);
return -EOVERFLOW; return -EOVERFLOW;

View file

@ -283,20 +283,40 @@ static int nfs_verify_server_address(struct sockaddr *addr)
return 0; return 0;
} }
#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
{
return true;
}
#else
static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
{
if (ctx->version == 4)
return true;
return false;
}
#endif
/* /*
* Sanity check the NFS transport protocol. * Sanity check the NFS transport protocol.
*
*/ */
static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx) static int nfs_validate_transport_protocol(struct fs_context *fc,
struct nfs_fs_context *ctx)
{ {
switch (ctx->nfs_server.protocol) { switch (ctx->nfs_server.protocol) {
case XPRT_TRANSPORT_UDP: case XPRT_TRANSPORT_UDP:
if (nfs_server_transport_udp_invalid(ctx))
goto out_invalid_transport_udp;
break;
case XPRT_TRANSPORT_TCP: case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_RDMA: case XPRT_TRANSPORT_RDMA:
break; break;
default: default:
ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
} }
return 0;
out_invalid_transport_udp:
return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
} }
/* /*
@ -305,8 +325,6 @@ static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
*/ */
static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx) static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx)
{ {
nfs_validate_transport_protocol(ctx);
if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP || if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP ||
ctx->mount_server.protocol == XPRT_TRANSPORT_TCP) ctx->mount_server.protocol == XPRT_TRANSPORT_TCP)
return; return;
@ -932,6 +950,7 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
struct nfs_fh *mntfh = ctx->mntfh; struct nfs_fh *mntfh = ctx->mntfh;
struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
int extra_flags = NFS_MOUNT_LEGACY_INTERFACE; int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
int ret;
if (data == NULL) if (data == NULL)
goto out_no_data; goto out_no_data;
@ -976,6 +995,15 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
memset(mntfh->data + mntfh->size, 0, memset(mntfh->data + mntfh->size, 0,
sizeof(mntfh->data) - mntfh->size); sizeof(mntfh->data) - mntfh->size);
/*
* for proto == XPRT_TRANSPORT_UDP, which is what uses
* to_exponential, implying shift: limit the shift value
* to BITS_PER_LONG (majortimeo is unsigned long)
*/
if (!(data->flags & NFS_MOUNT_TCP)) /* this will be UDP */
if (data->retrans >= 64) /* shift value is too large */
goto out_invalid_data;
/* /*
* Translate to nfs_fs_context, which nfs_fill_super * Translate to nfs_fs_context, which nfs_fill_super
* can deal with. * can deal with.
@ -1048,6 +1076,10 @@ static int nfs23_parse_monolithic(struct fs_context *fc,
goto generic; goto generic;
} }
ret = nfs_validate_transport_protocol(fc, ctx);
if (ret)
return ret;
ctx->skip_reconfig_option_check = true; ctx->skip_reconfig_option_check = true;
return 0; return 0;
@ -1076,6 +1108,9 @@ out_no_address:
out_invalid_fh: out_invalid_fh:
return nfs_invalf(fc, "NFS: invalid root filehandle"); return nfs_invalf(fc, "NFS: invalid root filehandle");
out_invalid_data:
return nfs_invalf(fc, "NFS: invalid binary mount data");
} }
#if IS_ENABLED(CONFIG_NFS_V4) #if IS_ENABLED(CONFIG_NFS_V4)
@ -1146,6 +1181,7 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
{ {
struct nfs_fs_context *ctx = nfs_fc2context(fc); struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address; struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
int ret;
char *c; char *c;
if (!data) { if (!data) {
@ -1218,9 +1254,9 @@ static int nfs4_parse_monolithic(struct fs_context *fc,
ctx->acdirmin = data->acdirmin; ctx->acdirmin = data->acdirmin;
ctx->acdirmax = data->acdirmax; ctx->acdirmax = data->acdirmax;
ctx->nfs_server.protocol = data->proto; ctx->nfs_server.protocol = data->proto;
nfs_validate_transport_protocol(ctx); ret = nfs_validate_transport_protocol(fc, ctx);
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP) if (ret)
goto out_invalid_transport_udp; return ret;
done: done:
ctx->skip_reconfig_option_check = true; ctx->skip_reconfig_option_check = true;
return 0; return 0;
@ -1231,9 +1267,6 @@ out_inval_auth:
out_no_address: out_no_address:
return nfs_invalf(fc, "NFS4: mount program didn't pass remote address"); return nfs_invalf(fc, "NFS4: mount program didn't pass remote address");
out_invalid_transport_udp:
return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
} }
#endif #endif
@ -1298,6 +1331,10 @@ static int nfs_fs_context_validate(struct fs_context *fc)
if (!nfs_verify_server_address(sap)) if (!nfs_verify_server_address(sap))
goto out_no_address; goto out_no_address;
ret = nfs_validate_transport_protocol(fc, ctx);
if (ret)
return ret;
if (ctx->version == 4) { if (ctx->version == 4) {
if (IS_ENABLED(CONFIG_NFS_V4)) { if (IS_ENABLED(CONFIG_NFS_V4)) {
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
@ -1306,9 +1343,6 @@ static int nfs_fs_context_validate(struct fs_context *fc)
port = NFS_PORT; port = NFS_PORT;
max_namelen = NFS4_MAXNAMLEN; max_namelen = NFS4_MAXNAMLEN;
max_pathlen = NFS4_MAXPATHLEN; max_pathlen = NFS4_MAXPATHLEN;
nfs_validate_transport_protocol(ctx);
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
goto out_invalid_transport_udp;
ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL | ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL |
NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK | NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK |
NFS_MOUNT_LOCAL_FCNTL); NFS_MOUNT_LOCAL_FCNTL);
@ -1317,10 +1351,6 @@ static int nfs_fs_context_validate(struct fs_context *fc)
} }
} else { } else {
nfs_set_mount_transport_protocol(ctx); nfs_set_mount_transport_protocol(ctx);
#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
goto out_invalid_transport_udp;
#endif
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA) if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
port = NFS_RDMA_PORT; port = NFS_RDMA_PORT;
} }
@ -1354,8 +1384,6 @@ out_no_device_name:
out_v4_not_compiled: out_v4_not_compiled:
nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel"); nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel");
return -EPROTONOSUPPORT; return -EPROTONOSUPPORT;
out_invalid_transport_udp:
return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
out_no_address: out_no_address:
return nfs_invalf(fc, "NFS: mount program didn't pass remote address"); return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
out_mountproto_mismatch: out_mountproto_mismatch:

View file

@ -164,34 +164,19 @@ static int nfs_attribute_timeout(struct inode *inode)
return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
} }
static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags) static bool nfs_check_cache_flags_invalid(struct inode *inode,
unsigned long flags)
{ {
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
/* Special case for the pagecache or access cache */
if (flags == NFS_INO_REVAL_PAGECACHE &&
!(cache_validity & NFS_INO_REVAL_FORCED))
return false;
return (cache_validity & flags) != 0; return (cache_validity & flags) != 0;
} }
static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags)
{
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
if ((cache_validity & flags) != 0)
return true;
if (nfs_attribute_timeout(inode))
return true;
return false;
}
bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags) bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
{ {
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) if (nfs_check_cache_flags_invalid(inode, flags))
return nfs_check_cache_invalid_delegated(inode, flags); return true;
return nfs_attribute_cache_expired(inode);
return nfs_check_cache_invalid_not_delegated(inode, flags);
} }
EXPORT_SYMBOL_GPL(nfs_check_cache_invalid); EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
@ -214,20 +199,21 @@ void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
if (have_delegation) { if (have_delegation) {
if (!(flags & NFS_INO_REVAL_FORCED)) if (!(flags & NFS_INO_REVAL_FORCED))
flags &= ~NFS_INO_INVALID_OTHER; flags &= ~(NFS_INO_INVALID_MODE |
flags &= ~(NFS_INO_INVALID_CHANGE NFS_INO_INVALID_OTHER |
| NFS_INO_INVALID_SIZE NFS_INO_INVALID_XATTR);
| NFS_INO_REVAL_PAGECACHE flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
| NFS_INO_INVALID_XATTR); } else if (flags & NFS_INO_REVAL_PAGECACHE)
} flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE;
if (!nfs_has_xattr_cache(nfsi)) if (!nfs_has_xattr_cache(nfsi))
flags &= ~NFS_INO_INVALID_XATTR; flags &= ~NFS_INO_INVALID_XATTR;
if (inode->i_mapping->nrpages == 0)
flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
nfsi->cache_validity |= flags;
if (flags & NFS_INO_INVALID_DATA) if (flags & NFS_INO_INVALID_DATA)
nfs_fscache_invalidate(inode); nfs_fscache_invalidate(inode);
if (inode->i_mapping->nrpages == 0)
flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED);
nfsi->cache_validity |= flags;
} }
EXPORT_SYMBOL_GPL(nfs_set_cache_invalid); EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
@ -452,6 +438,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
.fattr = fattr .fattr = fattr
}; };
struct inode *inode = ERR_PTR(-ENOENT); struct inode *inode = ERR_PTR(-ENOENT);
u64 fattr_supported = NFS_SB(sb)->fattr_valid;
unsigned long hash; unsigned long hash;
nfs_attr_check_mountpoint(sb, fattr); nfs_attr_check_mountpoint(sb, fattr);
@ -484,8 +471,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
inode->i_mode = fattr->mode; inode->i_mode = fattr->mode;
nfsi->cache_validity = 0; nfsi->cache_validity = 0;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0 if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
&& nfs_server_capable(inode, NFS_CAP_MODE)) && (fattr_supported & NFS_ATTR_FATTR_MODE))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
/* Why so? Because we want revalidate for devices/FIFOs, and /* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations. * that's precisely what we have in nfs_file_inode_operations.
*/ */
@ -530,15 +517,15 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfsi->attr_gencount = fattr->gencount; nfsi->attr_gencount = fattr->gencount;
if (fattr->valid & NFS_ATTR_FATTR_ATIME) if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime; inode->i_atime = fattr->atime;
else if (nfs_server_capable(inode, NFS_CAP_ATIME)) else if (fattr_supported & NFS_ATTR_FATTR_ATIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME); nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME) if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime; inode->i_mtime = fattr->mtime;
else if (nfs_server_capable(inode, NFS_CAP_MTIME)) else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME) if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime; inode->i_ctime = fattr->ctime;
else if (nfs_server_capable(inode, NFS_CAP_CTIME)) else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME); nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE) if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode_set_iversion_raw(inode, fattr->change_attr); inode_set_iversion_raw(inode, fattr->change_attr);
@ -550,29 +537,31 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st
nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE); nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE);
if (fattr->valid & NFS_ATTR_FATTR_NLINK) if (fattr->valid & NFS_ATTR_FATTR_NLINK)
set_nlink(inode, fattr->nlink); set_nlink(inode, fattr->nlink);
else if (nfs_server_capable(inode, NFS_CAP_NLINK)) else if (fattr_supported & NFS_ATTR_FATTR_NLINK)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK);
if (fattr->valid & NFS_ATTR_FATTR_OWNER) if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid; inode->i_uid = fattr->uid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER)) else if (fattr_supported & NFS_ATTR_FATTR_OWNER)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
if (fattr->valid & NFS_ATTR_FATTR_GROUP) if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid; inode->i_gid = fattr->gid;
else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP)) else if (fattr_supported & NFS_ATTR_FATTR_GROUP)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
if (nfs_server_capable(inode, NFS_CAP_XATTR)) if (nfs_server_capable(inode, NFS_CAP_XATTR))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR); nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks; inode->i_blocks = fattr->du.nfs2.blocks;
else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED &&
fattr->size != 0)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) { if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
/* /*
* report the blocks in 512byte units * report the blocks in 512byte units
*/ */
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
} } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED &&
fattr->size != 0)
if (nfsi->cache_validity != 0) nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
nfs_setsecurity(inode, fattr, label); nfs_setsecurity(inode, fattr, label);
@ -634,8 +623,7 @@ nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
} }
/* Optimization: if the end result is no change, don't RPC */ /* Optimization: if the end result is no change, don't RPC */
attr->ia_valid &= NFS_VALID_ATTRS; if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0)
if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0; return 0;
trace_nfs_setattr_enter(inode); trace_nfs_setattr_enter(inode);
@ -710,12 +698,20 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = fattr->gencount; NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & ATTR_SIZE) != 0) { if ((attr->ia_valid & ATTR_SIZE) != 0) {
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME); nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME |
NFS_INO_INVALID_BLOCKS);
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size); nfs_vmtruncate(inode, attr->ia_size);
} }
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME; NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME;
if ((attr->ia_valid & ATTR_KILL_SUID) != 0 &&
inode->i_mode & S_ISUID)
inode->i_mode &= ~S_ISUID;
if ((attr->ia_valid & ATTR_KILL_SGID) != 0 &&
(inode->i_mode & (S_ISGID | S_IXGRP)) ==
(S_ISGID | S_IXGRP))
inode->i_mode &= ~S_ISGID;
if ((attr->ia_valid & ATTR_MODE) != 0) { if ((attr->ia_valid & ATTR_MODE) != 0) {
int mode = attr->ia_mode & S_IALLUGO; int mode = attr->ia_mode & S_IALLUGO;
mode |= inode->i_mode & ~S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO;
@ -793,14 +789,28 @@ static void nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
dput(parent); dput(parent);
} }
static bool nfs_need_revalidate_inode(struct inode *inode) static u32 nfs_get_valid_attrmask(struct inode *inode)
{ {
if (NFS_I(inode)->cache_validity & unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) u32 reply_mask = STATX_INO | STATX_TYPE;
return true;
if (nfs_attribute_cache_expired(inode)) if (!(cache_validity & NFS_INO_INVALID_ATIME))
return true; reply_mask |= STATX_ATIME;
return false; if (!(cache_validity & NFS_INO_INVALID_CTIME))
reply_mask |= STATX_CTIME;
if (!(cache_validity & NFS_INO_INVALID_MTIME))
reply_mask |= STATX_MTIME;
if (!(cache_validity & NFS_INO_INVALID_SIZE))
reply_mask |= STATX_SIZE;
if (!(cache_validity & NFS_INO_INVALID_NLINK))
reply_mask |= STATX_NLINK;
if (!(cache_validity & NFS_INO_INVALID_MODE))
reply_mask |= STATX_MODE;
if (!(cache_validity & NFS_INO_INVALID_OTHER))
reply_mask |= STATX_UID | STATX_GID;
if (!(cache_validity & NFS_INO_INVALID_BLOCKS))
reply_mask |= STATX_BLOCKS;
return reply_mask;
} }
int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path, int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
@ -815,9 +825,13 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
trace_nfs_getattr_enter(inode); trace_nfs_getattr_enter(inode);
request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
STATX_INO | STATX_SIZE | STATX_BLOCKS;
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) { if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
nfs_readdirplus_parent_cache_hit(path->dentry); nfs_readdirplus_parent_cache_hit(path->dentry);
goto out_no_update; goto out_no_revalidate;
} }
/* Flush out writes to the server in order to update c/mtime. */ /* Flush out writes to the server in order to update c/mtime. */
@ -850,14 +864,24 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
/* Check whether the cached attributes are stale */ /* Check whether the cached attributes are stale */
do_update |= force_sync || nfs_attribute_cache_expired(inode); do_update |= force_sync || nfs_attribute_cache_expired(inode);
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
do_update |= cache_validity & do_update |= cache_validity & NFS_INO_INVALID_CHANGE;
(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL);
if (request_mask & STATX_ATIME) if (request_mask & STATX_ATIME)
do_update |= cache_validity & NFS_INO_INVALID_ATIME; do_update |= cache_validity & NFS_INO_INVALID_ATIME;
if (request_mask & (STATX_CTIME|STATX_MTIME)) if (request_mask & STATX_CTIME)
do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE; do_update |= cache_validity & NFS_INO_INVALID_CTIME;
if (request_mask & STATX_MTIME)
do_update |= cache_validity & NFS_INO_INVALID_MTIME;
if (request_mask & STATX_SIZE)
do_update |= cache_validity & NFS_INO_INVALID_SIZE;
if (request_mask & STATX_NLINK)
do_update |= cache_validity & NFS_INO_INVALID_NLINK;
if (request_mask & STATX_MODE)
do_update |= cache_validity & NFS_INO_INVALID_MODE;
if (request_mask & (STATX_UID | STATX_GID))
do_update |= cache_validity & NFS_INO_INVALID_OTHER;
if (request_mask & STATX_BLOCKS) if (request_mask & STATX_BLOCKS)
do_update |= cache_validity & NFS_INO_INVALID_BLOCKS; do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
if (do_update) { if (do_update) {
/* Update the attribute cache */ /* Update the attribute cache */
if (!(server->flags & NFS_MOUNT_NOAC)) if (!(server->flags & NFS_MOUNT_NOAC))
@ -871,8 +895,8 @@ int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
nfs_readdirplus_parent_cache_hit(path->dentry); nfs_readdirplus_parent_cache_hit(path->dentry);
out_no_revalidate: out_no_revalidate:
/* Only return attributes that were revalidated. */ /* Only return attributes that were revalidated. */
stat->result_mask &= request_mask; stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
out_no_update:
generic_fillattr(&init_user_ns, inode, stat); generic_fillattr(&init_user_ns, inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
if (S_ISDIR(inode->i_mode)) if (S_ISDIR(inode->i_mode))
@ -963,7 +987,6 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
{ {
struct nfs_inode *nfsi; struct nfs_inode *nfsi;
struct inode *inode; struct inode *inode;
struct nfs_server *server;
if (!(ctx->mode & FMODE_WRITE)) if (!(ctx->mode & FMODE_WRITE))
return; return;
@ -979,10 +1002,10 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
return; return;
if (!list_empty(&nfsi->open_files)) if (!list_empty(&nfsi->open_files))
return; return;
server = NFS_SERVER(inode); if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)
if (server->flags & NFS_MOUNT_NOCTO)
return; return;
nfs_revalidate_inode(server, inode); nfs_revalidate_inode(inode,
NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
} }
EXPORT_SYMBOL_GPL(nfs_close_context); EXPORT_SYMBOL_GPL(nfs_close_context);
@ -1237,16 +1260,16 @@ int nfs_attribute_cache_expired(struct inode *inode)
/** /**
* nfs_revalidate_inode - Revalidate the inode attributes * nfs_revalidate_inode - Revalidate the inode attributes
* @server: pointer to nfs_server struct
* @inode: pointer to inode struct * @inode: pointer to inode struct
* @flags: cache flags to check
* *
* Updates inode attribute information by retrieving the data from the server. * Updates inode attribute information by retrieving the data from the server.
*/ */
int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) int nfs_revalidate_inode(struct inode *inode, unsigned long flags)
{ {
if (!nfs_need_revalidate_inode(inode)) if (!nfs_check_cache_invalid(inode, flags))
return NFS_STALE(inode) ? -ESTALE : 0; return NFS_STALE(inode) ? -ESTALE : 0;
return __nfs_revalidate_inode(server, inode); return __nfs_revalidate_inode(NFS_SERVER(inode), inode);
} }
EXPORT_SYMBOL_GPL(nfs_revalidate_inode); EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
@ -1332,7 +1355,7 @@ out:
bool nfs_mapping_need_revalidate_inode(struct inode *inode) bool nfs_mapping_need_revalidate_inode(struct inode *inode)
{ {
return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) || return nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE) ||
NFS_STALE(inode); NFS_STALE(inode);
} }
@ -1468,8 +1491,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if (!nfs_file_has_buffered_writers(nfsi)) { if (!nfs_file_has_buffered_writers(nfsi)) {
/* Verify a few of the more important attributes */ /* Verify a few of the more important attributes */
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr)) if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
invalid |= NFS_INO_INVALID_CHANGE invalid |= NFS_INO_INVALID_CHANGE;
| NFS_INO_REVAL_PAGECACHE;
ts = inode->i_mtime; ts = inode->i_mtime;
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime)) if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
@ -1483,28 +1505,21 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
cur_size = i_size_read(inode); cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size); new_isize = nfs_size_to_loff_t(fattr->size);
if (cur_size != new_isize) if (cur_size != new_isize)
invalid |= NFS_INO_INVALID_SIZE invalid |= NFS_INO_INVALID_SIZE;
| NFS_INO_REVAL_PAGECACHE;
} }
} }
/* Have any file permissions changed? */ /* Have any file permissions changed? */
if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
invalid |= NFS_INO_INVALID_ACCESS invalid |= NFS_INO_INVALID_MODE;
| NFS_INO_INVALID_ACL
| NFS_INO_INVALID_OTHER;
if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid)) if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
invalid |= NFS_INO_INVALID_ACCESS invalid |= NFS_INO_INVALID_OTHER;
| NFS_INO_INVALID_ACL
| NFS_INO_INVALID_OTHER;
if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid)) if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
invalid |= NFS_INO_INVALID_ACCESS invalid |= NFS_INO_INVALID_OTHER;
| NFS_INO_INVALID_ACL
| NFS_INO_INVALID_OTHER;
/* Has the link count changed? */ /* Has the link count changed? */
if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink) if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
invalid |= NFS_INO_INVALID_OTHER; invalid |= NFS_INO_INVALID_NLINK;
ts = inode->i_atime; ts = inode->i_atime;
if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime)) if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime))
@ -1642,41 +1657,142 @@ EXPORT_SYMBOL_GPL(_nfs_display_fhandle);
#endif #endif
/** /**
* nfs_inode_attrs_need_update - check if the inode attributes need updating * nfs_inode_attrs_cmp_generic - compare attributes
* @inode: pointer to inode
* @fattr: attributes * @fattr: attributes
* @inode: pointer to inode
*
* Attempt to divine whether or not an RPC call reply carrying stale
* attributes got scheduled after another call carrying updated ones.
* Note also the check for wraparound of 'attr_gencount'
*
* The function returns '1' if it thinks the attributes in @fattr are
* more recent than the ones cached in @inode. Otherwise it returns
* the value '0'.
*/
static int nfs_inode_attrs_cmp_generic(const struct nfs_fattr *fattr,
const struct inode *inode)
{
unsigned long attr_gencount = NFS_I(inode)->attr_gencount;
return (long)(fattr->gencount - attr_gencount) > 0 ||
(long)(attr_gencount - nfs_read_attr_generation_counter()) > 0;
}
/**
* nfs_inode_attrs_cmp_monotonic - compare attributes
* @fattr: attributes
* @inode: pointer to inode
* *
* Attempt to divine whether or not an RPC call reply carrying stale * Attempt to divine whether or not an RPC call reply carrying stale
* attributes got scheduled after another call carrying updated ones. * attributes got scheduled after another call carrying updated ones.
* *
* To do so, the function first assumes that a more recent ctime means * We assume that the server observes monotonic semantics for
* that the attributes in fattr are newer, however it also attempt to * the change attribute, so a larger value means that the attributes in
* catch the case where ctime either didn't change, or went backwards * @fattr are more recent, in which case the function returns the
* (if someone reset the clock on the server) by looking at whether * value '1'.
* or not this RPC call was started after the inode was last updated. * A return value of '0' indicates no measurable change
* Note also the check for wraparound of 'attr_gencount' * A return value of '-1' means that the attributes in @inode are
* * more recent.
* The function returns 'true' if it thinks the attributes in 'fattr' are
* more recent than the ones cached in the inode.
*
*/ */
static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) static int nfs_inode_attrs_cmp_monotonic(const struct nfs_fattr *fattr,
const struct inode *inode)
{ {
const struct nfs_inode *nfsi = NFS_I(inode); s64 diff = fattr->change_attr - inode_peek_iversion_raw(inode);
if (diff > 0)
return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || return 1;
((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); return diff == 0 ? 0 : -1;
} }
static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) /**
* nfs_inode_attrs_cmp_strict_monotonic - compare attributes
* @fattr: attributes
* @inode: pointer to inode
*
* Attempt to divine whether or not an RPC call reply carrying stale
* attributes got scheduled after another call carrying updated ones.
*
* We assume that the server observes strictly monotonic semantics for
* the change attribute, so a larger value means that the attributes in
* @fattr are more recent, in which case the function returns the
* value '1'.
* A return value of '-1' means that the attributes in @inode are
* more recent or unchanged.
*/
static int nfs_inode_attrs_cmp_strict_monotonic(const struct nfs_fattr *fattr,
const struct inode *inode)
{ {
int ret; return nfs_inode_attrs_cmp_monotonic(fattr, inode) > 0 ? 1 : -1;
}
/**
* nfs_inode_attrs_cmp - compare attributes
* @fattr: attributes
* @inode: pointer to inode
*
* This function returns '1' if it thinks the attributes in @fattr are
* more recent than the ones cached in @inode. It returns '-1' if
* the attributes in @inode are more recent than the ones in @fattr,
* and it returns 0 if not sure.
*/
static int nfs_inode_attrs_cmp(const struct nfs_fattr *fattr,
const struct inode *inode)
{
if (nfs_inode_attrs_cmp_generic(fattr, inode) > 0)
return 1;
switch (NFS_SERVER(inode)->change_attr_type) {
case NFS4_CHANGE_TYPE_IS_UNDEFINED:
break;
case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
break;
return nfs_inode_attrs_cmp_monotonic(fattr, inode);
default:
if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
break;
return nfs_inode_attrs_cmp_strict_monotonic(fattr, inode);
}
return 0;
}
/**
* nfs_inode_finish_partial_attr_update - complete a previous inode update
* @fattr: attributes
* @inode: pointer to inode
*
* Returns '1' if the last attribute update left the inode cached
* attributes in a partially unrevalidated state, and @fattr
* matches the change attribute of that partial update.
* Otherwise returns '0'.
*/
static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr,
const struct inode *inode)
{
const unsigned long check_valid =
NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER |
NFS_INO_INVALID_NLINK;
unsigned long cache_validity = NFS_I(inode)->cache_validity;
if (!(cache_validity & NFS_INO_INVALID_CHANGE) &&
(cache_validity & check_valid) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0)
return 1;
return 0;
}
static int nfs_refresh_inode_locked(struct inode *inode,
struct nfs_fattr *fattr)
{
int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
int ret = 0;
trace_nfs_refresh_inode_enter(inode); trace_nfs_refresh_inode_enter(inode);
if (nfs_inode_attrs_need_update(inode, fattr)) if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode))
ret = nfs_update_inode(inode, fattr); ret = nfs_update_inode(inode, fattr);
else else if (attr_cmp == 0)
ret = nfs_check_inode_attributes(inode, fattr); ret = nfs_check_inode_attributes(inode, fattr);
trace_nfs_refresh_inode_exit(inode, ret); trace_nfs_refresh_inode_exit(inode, ret);
@ -1761,11 +1877,13 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
*/ */
int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr) int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
{ {
int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
int status; int status;
/* Don't do a WCC update if these attributes are already stale */ /* Don't do a WCC update if these attributes are already stale */
if ((fattr->valid & NFS_ATTR_FATTR) == 0 || if (attr_cmp < 0)
!nfs_inode_attrs_need_update(inode, fattr)) { return 0;
if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) {
fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
| NFS_ATTR_FATTR_PRESIZE | NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PREMTIME | NFS_ATTR_FATTR_PREMTIME
@ -1839,9 +1957,10 @@ EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc);
*/ */
static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{ {
struct nfs_server *server; struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_isize, new_isize; loff_t cur_isize, new_isize;
u64 fattr_supported = server->fattr_valid;
unsigned long invalid = 0; unsigned long invalid = 0;
unsigned long now = jiffies; unsigned long now = jiffies;
unsigned long save_cache_validity; unsigned long save_cache_validity;
@ -1885,7 +2004,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
goto out_err; goto out_err;
} }
server = NFS_SERVER(inode);
/* Update the fsid? */ /* Update the fsid? */
if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) && if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
!nfs_fsid_equal(&server->fsid, &fattr->fsid) && !nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
@ -1904,14 +2022,17 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME | NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED | NFS_INO_REVAL_FORCED
| NFS_INO_REVAL_PAGECACHE
| NFS_INO_INVALID_BLOCKS); | NFS_INO_INVALID_BLOCKS);
/* Do atomic weak cache consistency updates */ /* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr); nfs_wcc_update_inode(inode, fattr);
if (pnfs_layoutcommit_outstanding(inode)) { if (pnfs_layoutcommit_outstanding(inode)) {
nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR; nfsi->cache_validity |=
save_cache_validity &
(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
NFS_INO_INVALID_BLOCKS);
cache_revalidated = false; cache_revalidated = false;
} }
@ -1928,6 +2049,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
save_cache_validity |= NFS_INO_INVALID_CTIME save_cache_validity |= NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_MTIME | NFS_INO_INVALID_MTIME
| NFS_INO_INVALID_SIZE | NFS_INO_INVALID_SIZE
| NFS_INO_INVALID_BLOCKS
| NFS_INO_INVALID_NLINK
| NFS_INO_INVALID_MODE
| NFS_INO_INVALID_OTHER; | NFS_INO_INVALID_OTHER;
if (S_ISDIR(inode->i_mode)) if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode); nfs_force_lookup_revalidate(inode);
@ -1940,28 +2064,24 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
attr_changed = true; attr_changed = true;
} }
} else { } else {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_CHANGE save_cache_validity & NFS_INO_INVALID_CHANGE;
| NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
if (fattr->valid & NFS_ATTR_FATTR_MTIME) { if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
inode->i_mtime = fattr->mtime; inode->i_mtime = fattr->mtime;
} else if (server->caps & NFS_CAP_MTIME) { } else if (fattr_supported & NFS_ATTR_FATTR_MTIME) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_MTIME save_cache_validity & NFS_INO_INVALID_MTIME;
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
if (fattr->valid & NFS_ATTR_FATTR_CTIME) { if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
inode->i_ctime = fattr->ctime; inode->i_ctime = fattr->ctime;
} else if (server->caps & NFS_CAP_CTIME) { } else if (fattr_supported & NFS_ATTR_FATTR_CTIME) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_CTIME save_cache_validity & NFS_INO_INVALID_CTIME;
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
@ -1985,21 +2105,23 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
(long long)cur_isize, (long long)cur_isize,
(long long)new_isize); (long long)new_isize);
} }
if (new_isize == 0 &&
!(fattr->valid & (NFS_ATTR_FATTR_SPACE_USED |
NFS_ATTR_FATTR_BLOCKS_USED))) {
fattr->du.nfs3.used = 0;
fattr->valid |= NFS_ATTR_FATTR_SPACE_USED;
}
} else { } else {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_SIZE save_cache_validity & NFS_INO_INVALID_SIZE;
| NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
if (fattr->valid & NFS_ATTR_FATTR_ATIME) if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime; inode->i_atime = fattr->atime;
else if (server->caps & NFS_CAP_ATIME) { else if (fattr_supported & NFS_ATTR_FATTR_ATIME) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_ATIME save_cache_validity & NFS_INO_INVALID_ATIME;
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
@ -2012,10 +2134,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
| NFS_INO_INVALID_ACL; | NFS_INO_INVALID_ACL;
attr_changed = true; attr_changed = true;
} }
} else if (server->caps & NFS_CAP_MODE) { } else if (fattr_supported & NFS_ATTR_FATTR_MODE) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_OTHER save_cache_validity & NFS_INO_INVALID_MODE;
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
@ -2026,10 +2147,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_uid = fattr->uid; inode->i_uid = fattr->uid;
attr_changed = true; attr_changed = true;
} }
} else if (server->caps & NFS_CAP_OWNER) { } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_OTHER save_cache_validity & NFS_INO_INVALID_OTHER;
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
@ -2040,10 +2160,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_gid = fattr->gid; inode->i_gid = fattr->gid;
attr_changed = true; attr_changed = true;
} }
} else if (server->caps & NFS_CAP_OWNER_GROUP) { } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_OTHER save_cache_validity & NFS_INO_INVALID_OTHER;
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
@ -2054,10 +2173,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink); set_nlink(inode, fattr->nlink);
attr_changed = true; attr_changed = true;
} }
} else if (server->caps & NFS_CAP_NLINK) { } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_OTHER save_cache_validity & NFS_INO_INVALID_NLINK;
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
@ -2066,18 +2184,22 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
* report the blocks in 512byte units * report the blocks in 512byte units
*/ */
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used); inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
} else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) {
nfsi->cache_validity |=
save_cache_validity & NFS_INO_INVALID_BLOCKS;
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) {
inode->i_blocks = fattr->du.nfs2.blocks; inode->i_blocks = fattr->du.nfs2.blocks;
else { } else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) {
nfsi->cache_validity |= save_cache_validity & nfsi->cache_validity |=
(NFS_INO_INVALID_BLOCKS save_cache_validity & NFS_INO_INVALID_BLOCKS;
| NFS_INO_REVAL_FORCED);
cache_revalidated = false; cache_revalidated = false;
} }
/* Update attrtimeo value if we're out of the unstable period */ /* Update attrtimeo value if we're out of the unstable period */
if (attr_changed) { if (attr_changed) {
invalid &= ~NFS_INO_INVALID_ATTR;
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now; nfsi->attrtimeo_timestamp = now;
@ -2094,7 +2216,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfsi->attrtimeo_timestamp = now; nfsi->attrtimeo_timestamp = now;
} }
/* Set the barrier to be more recent than this fattr */ /* Set the barrier to be more recent than this fattr */
if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) if ((long)(fattr->gencount - nfsi->attr_gencount) > 0)
nfsi->attr_gencount = fattr->gencount; nfsi->attr_gencount = fattr->gencount;
} }

View file

@ -181,7 +181,7 @@ struct nfs_mount_request {
struct net *net; struct net *net;
}; };
extern int nfs_mount(struct nfs_mount_request *info); extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans);
extern void nfs_umount(const struct nfs_mount_request *info); extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */ /* client.c */

View file

@ -104,7 +104,7 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
} }
/** /**
* nfs_end_io_direct - declare the file is being used for direct i/o * nfs_start_io_direct - declare the file is being used for direct i/o
* @inode: file inode * @inode: file inode
* *
* Declare that a direct I/O operation is about to start, and ensure * Declare that a direct I/O operation is about to start, and ensure

View file

@ -136,14 +136,16 @@ struct mnt_fhstatus {
/** /**
* nfs_mount - Obtain an NFS file handle for the given host and path * nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments * @info: pointer to mount request arguments
* @timeo: deciseconds the mount waits for a response before it retries
* @retrans: number of times the mount retries a request
* *
* Uses default timeout parameters specified by underlying transport. On * Uses timeout parameters specified by caller. On successful return, the
* successful return, the auth_flavs list and auth_flav_len will be populated * auth_flavs list and auth_flav_len will be populated with the list from the
* with the list from the server or a faked-up list if the server didn't * server or a faked-up list if the server didn't provide one.
* provide one.
*/ */
int nfs_mount(struct nfs_mount_request *info) int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans)
{ {
struct rpc_timeout mnt_timeout;
struct mountres result = { struct mountres result = {
.fh = info->fh, .fh = info->fh,
.auth_count = info->auth_flav_len, .auth_count = info->auth_flav_len,
@ -158,6 +160,7 @@ int nfs_mount(struct nfs_mount_request *info)
.protocol = info->protocol, .protocol = info->protocol,
.address = info->sap, .address = info->sap,
.addrsize = info->salen, .addrsize = info->salen,
.timeout = &mnt_timeout,
.servername = info->hostname, .servername = info->hostname,
.program = &mnt_program, .program = &mnt_program,
.version = info->version, .version = info->version,
@ -177,6 +180,7 @@ int nfs_mount(struct nfs_mount_request *info)
if (info->noresvport) if (info->noresvport)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
nfs_init_timeout_values(&mnt_timeout, info->protocol, timeo, retrans);
mnt_clnt = rpc_create(&args); mnt_clnt = rpc_create(&args);
if (IS_ERR(mnt_clnt)) if (IS_ERR(mnt_clnt))
goto out_clnt_err; goto out_clnt_err;

View file

@ -65,7 +65,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
if (!nfs_server_capable(inode, NFS_CAP_ACLS)) if (!nfs_server_capable(inode, NFS_CAP_ACLS))
return ERR_PTR(-EOPNOTSUPP); return ERR_PTR(-EOPNOTSUPP);
status = nfs_revalidate_inode(server, inode); status = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (status < 0) if (status < 0)
return ERR_PTR(status); return ERR_PTR(status);

View file

@ -433,7 +433,7 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
if (unlikely(!p)) if (unlikely(!p))
return -EIO; return -EIO;
length = be32_to_cpup(p++); length = be32_to_cpup(p++);
if (unlikely(length > NFS3_FHSIZE)) if (unlikely(length > NFS3_FHSIZE || length == 0))
goto out_toobig; goto out_toobig;
p = xdr_inline_decode(xdr, length); p = xdr_inline_decode(xdr, length);
if (unlikely(!p)) if (unlikely(!p))
@ -442,7 +442,7 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
memcpy(fh->data, p, length); memcpy(fh->data, p, length);
return 0; return 0;
out_toobig: out_toobig:
dprintk("NFS: file handle size (%u) too big\n", length); trace_nfs_xdr_bad_filehandle(xdr, NFSERR_BADHANDLE);
return -E2BIG; return -E2BIG;
} }
@ -2227,6 +2227,7 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
/* ignore properties */ /* ignore properties */
result->lease_time = 0; result->lease_time = 0;
result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
return 0; return 0;
} }

View file

@ -46,11 +46,12 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
{ {
struct inode *inode = file_inode(filep); struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode); struct nfs_server *server = NFS_SERVER(inode);
u32 bitmask[3];
struct nfs42_falloc_args args = { struct nfs42_falloc_args args = {
.falloc_fh = NFS_FH(inode), .falloc_fh = NFS_FH(inode),
.falloc_offset = offset, .falloc_offset = offset,
.falloc_length = len, .falloc_length = len,
.falloc_bitmask = nfs4_fattr_bitmap, .falloc_bitmask = bitmask,
}; };
struct nfs42_falloc_res res = { struct nfs42_falloc_res res = {
.falloc_server = server, .falloc_server = server,
@ -68,6 +69,10 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
return status; return status;
} }
memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask));
if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)
bitmask[1] |= FATTR4_WORD1_SPACE_USED;
res.falloc_fattr = nfs_alloc_fattr(); res.falloc_fattr = nfs_alloc_fattr();
if (!res.falloc_fattr) if (!res.falloc_fattr)
return -ENOMEM; return -ENOMEM;
@ -75,7 +80,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
status = nfs4_call_sync(server->client, server, msg, status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0); &args.seq_args, &res.seq_res, 0);
if (status == 0) if (status == 0)
status = nfs_post_op_update_inode(inode, res.falloc_fattr); status = nfs_post_op_update_inode_force_wcc(inode,
res.falloc_fattr);
kfree(res.falloc_fattr); kfree(res.falloc_fattr);
return status; return status;
@ -84,7 +90,8 @@ static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep, static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
loff_t offset, loff_t len) loff_t offset, loff_t len)
{ {
struct nfs_server *server = NFS_SERVER(file_inode(filep)); struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { }; struct nfs4_exception exception = { };
struct nfs_lock_context *lock; struct nfs_lock_context *lock;
int err; int err;
@ -93,9 +100,13 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
if (IS_ERR(lock)) if (IS_ERR(lock))
return PTR_ERR(lock); return PTR_ERR(lock);
exception.inode = file_inode(filep); exception.inode = inode;
exception.state = lock->open_context->state; exception.state = lock->open_context->state;
err = nfs_sync_inode(inode);
if (err)
goto out;
do { do {
err = _nfs42_proc_fallocate(msg, filep, lock, offset, len); err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
if (err == -ENOTSUPP) { if (err == -ENOTSUPP) {
@ -104,7 +115,7 @@ static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
} }
err = nfs4_handle_exception(server, err, &exception); err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry); } while (exception.retry);
out:
nfs_put_lock_context(lock); nfs_put_lock_context(lock);
return err; return err;
} }
@ -142,16 +153,13 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return -EOPNOTSUPP; return -EOPNOTSUPP;
inode_lock(inode); inode_lock(inode);
err = nfs_sync_inode(inode);
if (err)
goto out_unlock;
err = nfs42_proc_fallocate(&msg, filep, offset, len); err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0) if (err == 0)
truncate_pagecache_range(inode, offset, (offset + len) -1); truncate_pagecache_range(inode, offset, (offset + len) -1);
if (err == -EOPNOTSUPP) if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE; NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
out_unlock:
inode_unlock(inode); inode_unlock(inode);
return err; return err;
} }
@ -261,6 +269,33 @@ out:
return status; return status;
} }
/**
* nfs42_copy_dest_done - perform inode cache updates after clone/copy offload
* @inode: pointer to destination inode
* @pos: destination offset
* @len: copy length
*
* Punch a hole in the inode page cache, so that the NFS client will
* know to retrieve new data.
* Update the file size if necessary, and then mark the inode as having
* invalid cached values for change attribute, ctime, mtime and space used.
*/
static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len)
{
loff_t newsize = pos + len;
loff_t end = newsize - 1;
truncate_pagecache_range(inode, pos, end);
spin_lock(&inode->i_lock);
if (newsize > i_size_read(inode))
i_size_write(inode, newsize);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_MTIME |
NFS_INO_INVALID_BLOCKS);
spin_unlock(&inode->i_lock);
}
static ssize_t _nfs42_proc_copy(struct file *src, static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_lock_context *src_lock, struct nfs_lock_context *src_lock,
struct file *dst, struct file *dst,
@ -354,19 +389,8 @@ static ssize_t _nfs42_proc_copy(struct file *src,
goto out; goto out;
} }
truncate_pagecache_range(dst_inode, pos_dst, nfs42_copy_dest_done(dst_inode, pos_dst, res->write_res.count);
pos_dst + res->write_res.count); nfs_invalidate_atime(src_inode);
spin_lock(&dst_inode->i_lock);
nfs_set_cache_invalid(
dst_inode, NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED |
NFS_INO_INVALID_SIZE | NFS_INO_INVALID_ATTR |
NFS_INO_INVALID_DATA);
spin_unlock(&dst_inode->i_lock);
spin_lock(&src_inode->i_lock);
nfs_set_cache_invalid(src_inode, NFS_INO_REVAL_PAGECACHE |
NFS_INO_REVAL_FORCED |
NFS_INO_INVALID_ATIME);
spin_unlock(&src_inode->i_lock);
status = res->write_res.count; status = res->write_res.count;
out: out:
if (args->sync) if (args->sync)
@ -659,7 +683,10 @@ static loff_t _nfs42_proc_llseek(struct file *filep,
if (status) if (status)
return status; return status;
return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes); if (whence == SEEK_DATA && res.sr_eof)
return -NFS4ERR_NXIO;
else
return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
} }
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence) loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
@ -1044,8 +1071,10 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f,
status = nfs4_call_sync(server->client, server, msg, status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0); &args.seq_args, &res.seq_res, 0);
if (status == 0) if (status == 0) {
nfs42_copy_dest_done(dst_inode, dst_offset, count);
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr); status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
}
kfree(res.dst_fattr); kfree(res.dst_fattr);
return status; return status;

View file

@ -168,7 +168,7 @@ nfs4_xattr_entry_lru_del(struct nfs4_xattr_entry *entry)
* make it easier to copy the value after an RPC, even if * make it easier to copy the value after an RPC, even if
* the value will not be passed up to application (e.g. * the value will not be passed up to application (e.g.
* for a 'query' getxattr with NULL buffer). * for a 'query' getxattr with NULL buffer).
* @len: Length of the value. Can be 0 for zero-length attribues. * @len: Length of the value. Can be 0 for zero-length attributes.
* @value and @pages will be NULL if @len is 0. * @value and @pages will be NULL if @len is 0.
*/ */
static struct nfs4_xattr_entry * static struct nfs4_xattr_entry *

View file

@ -420,9 +420,7 @@ static const struct nfs4_ssc_client_ops nfs4_ssc_clnt_ops_tbl = {
*/ */
void nfs42_ssc_register_ops(void) void nfs42_ssc_register_ops(void)
{ {
#ifdef CONFIG_NFSD_V4
nfs42_ssc_register(&nfs4_ssc_clnt_ops_tbl); nfs42_ssc_register(&nfs4_ssc_clnt_ops_tbl);
#endif
} }
/** /**
@ -433,9 +431,7 @@ void nfs42_ssc_register_ops(void)
*/ */
void nfs42_ssc_unregister_ops(void) void nfs42_ssc_unregister_ops(void)
{ {
#ifdef CONFIG_NFSD_V4
nfs42_ssc_unregister(&nfs4_ssc_clnt_ops_tbl); nfs42_ssc_unregister(&nfs4_ssc_clnt_ops_tbl);
#endif
} }
#endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_2 */

View file

@ -108,9 +108,10 @@ static int nfs41_test_stateid(struct nfs_server *, nfs4_stateid *,
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *, static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool); const struct cred *, bool);
#endif #endif
static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ],
struct nfs_server *server, const __u32 *src, struct inode *inode,
struct nfs4_label *label); struct nfs_server *server,
struct nfs4_label *label);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL #ifdef CONFIG_NFS_V4_SECURITY_LABEL
static inline struct nfs4_label * static inline struct nfs4_label *
@ -263,6 +264,7 @@ const u32 nfs4_fsinfo_bitmap[3] = { FATTR4_WORD0_MAXFILESIZE
| FATTR4_WORD1_FS_LAYOUT_TYPES, | FATTR4_WORD1_FS_LAYOUT_TYPES,
FATTR4_WORD2_LAYOUT_BLKSIZE FATTR4_WORD2_LAYOUT_BLKSIZE
| FATTR4_WORD2_CLONE_BLKSIZE | FATTR4_WORD2_CLONE_BLKSIZE
| FATTR4_WORD2_CHANGE_ATTR_TYPE
| FATTR4_WORD2_XATTR_SUPPORT | FATTR4_WORD2_XATTR_SUPPORT
}; };
@ -283,7 +285,7 @@ const u32 nfs4_fs_locations_bitmap[3] = {
}; };
static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src, static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
struct inode *inode) struct inode *inode, unsigned long flags)
{ {
unsigned long cache_validity; unsigned long cache_validity;
@ -291,22 +293,20 @@ static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
if (!inode || !nfs4_have_delegation(inode, FMODE_READ)) if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
return; return;
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags;
if (!(cache_validity & NFS_INO_REVAL_FORCED))
cache_validity &= ~(NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_SIZE);
/* Remove the attributes over which we have full control */
dst[1] &= ~FATTR4_WORD1_RAWDEV;
if (!(cache_validity & NFS_INO_INVALID_SIZE)) if (!(cache_validity & NFS_INO_INVALID_SIZE))
dst[0] &= ~FATTR4_WORD0_SIZE; dst[0] &= ~FATTR4_WORD0_SIZE;
if (!(cache_validity & NFS_INO_INVALID_CHANGE)) if (!(cache_validity & NFS_INO_INVALID_CHANGE))
dst[0] &= ~FATTR4_WORD0_CHANGE; dst[0] &= ~FATTR4_WORD0_CHANGE;
}
static void nfs4_bitmap_copy_adjust_setattr(__u32 *dst, if (!(cache_validity & NFS_INO_INVALID_MODE))
const __u32 *src, struct inode *inode) dst[1] &= ~FATTR4_WORD1_MODE;
{ if (!(cache_validity & NFS_INO_INVALID_OTHER))
nfs4_bitmap_copy_adjust(dst, src, inode); dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
} }
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry, static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
@ -1169,14 +1169,26 @@ int nfs4_call_sync(struct rpc_clnt *clnt,
static void static void
nfs4_inc_nlink_locked(struct inode *inode) nfs4_inc_nlink_locked(struct inode *inode)
{ {
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_NLINK);
inc_nlink(inode); inc_nlink(inode);
} }
static void
nfs4_inc_nlink(struct inode *inode)
{
spin_lock(&inode->i_lock);
nfs4_inc_nlink_locked(inode);
spin_unlock(&inode->i_lock);
}
static void static void
nfs4_dec_nlink_locked(struct inode *inode) nfs4_dec_nlink_locked(struct inode *inode)
{ {
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
NFS_INO_INVALID_CTIME |
NFS_INO_INVALID_NLINK);
drop_nlink(inode); drop_nlink(inode);
} }
@ -1186,11 +1198,23 @@ nfs4_update_changeattr_locked(struct inode *inode,
unsigned long timestamp, unsigned long cache_validity) unsigned long timestamp, unsigned long cache_validity)
{ {
struct nfs_inode *nfsi = NFS_I(inode); struct nfs_inode *nfsi = NFS_I(inode);
u64 change_attr = inode_peek_iversion_raw(inode);
cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME; cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) { switch (NFS_SERVER(inode)->change_attr_type) {
nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE; case NFS4_CHANGE_TYPE_IS_UNDEFINED:
break;
case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
if ((s64)(change_attr - cinfo->after) > 0)
goto out;
break;
default:
if ((s64)(change_attr - cinfo->after) >= 0)
goto out;
}
if (cinfo->atomic && cinfo->before == change_attr) {
nfsi->attrtimeo_timestamp = jiffies; nfsi->attrtimeo_timestamp = jiffies;
} else { } else {
if (S_ISDIR(inode->i_mode)) { if (S_ISDIR(inode->i_mode)) {
@ -1202,7 +1226,7 @@ nfs4_update_changeattr_locked(struct inode *inode,
cache_validity |= NFS_INO_REVAL_PAGECACHE; cache_validity |= NFS_INO_REVAL_PAGECACHE;
} }
if (cinfo->before != inode_peek_iversion_raw(inode)) if (cinfo->before != change_attr)
cache_validity |= NFS_INO_INVALID_ACCESS | cache_validity |= NFS_INO_INVALID_ACCESS |
NFS_INO_INVALID_ACL | NFS_INO_INVALID_ACL |
NFS_INO_INVALID_XATTR; NFS_INO_INVALID_XATTR;
@ -1210,8 +1234,9 @@ nfs4_update_changeattr_locked(struct inode *inode,
inode_set_iversion_raw(inode, cinfo->after); inode_set_iversion_raw(inode, cinfo->after);
nfsi->read_cache_jiffies = timestamp; nfsi->read_cache_jiffies = timestamp;
nfsi->attr_gencount = nfs_inc_attr_generation_counter(); nfsi->attr_gencount = nfs_inc_attr_generation_counter();
nfs_set_cache_invalid(inode, cache_validity);
nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE; nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
out:
nfs_set_cache_invalid(inode, cache_validity);
} }
void void
@ -3344,12 +3369,17 @@ static int nfs4_do_setattr(struct inode *inode, const struct cred *cred,
.inode = inode, .inode = inode,
.stateid = &arg.stateid, .stateid = &arg.stateid,
}; };
unsigned long adjust_flags = NFS_INO_INVALID_CHANGE;
int err; int err;
if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID))
adjust_flags |= NFS_INO_INVALID_MODE;
if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
adjust_flags |= NFS_INO_INVALID_OTHER;
do { do {
nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, olabel),
nfs4_bitmask(server, olabel), inode, adjust_flags);
inode);
err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx); err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx);
switch (err) { switch (err) {
@ -3591,6 +3621,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
struct nfs4_closedata *calldata = data; struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state; struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode; struct inode *inode = calldata->inode;
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo; struct pnfs_layout_hdr *lo;
bool is_rdonly, is_wronly, is_rdwr; bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0; int call_close = 0;
@ -3647,8 +3678,10 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) { if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */ /* Close-to-open cache consistency revalidation */
if (!nfs4_have_delegation(inode, FMODE_READ)) { if (!nfs4_have_delegation(inode, FMODE_READ)) {
calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask; nfs4_bitmask_set(calldata->arg.bitmask_store,
nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL); server->cache_consistency_bitmask,
inode, server, NULL);
calldata->arg.bitmask = calldata->arg.bitmask_store;
} else } else
calldata->arg.bitmask = NULL; calldata->arg.bitmask = NULL;
} }
@ -3835,12 +3868,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
} }
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL);
NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| server->fattr_valid = NFS_ATTR_FATTR_V4;
NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
NFS_CAP_CTIME|NFS_CAP_MTIME|
NFS_CAP_SECURITY_LABEL);
if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
server->caps |= NFS_CAP_ACLS; server->caps |= NFS_CAP_ACLS;
@ -3848,25 +3878,29 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
server->caps |= NFS_CAP_HARDLINKS; server->caps |= NFS_CAP_HARDLINKS;
if (res.has_symlinks != 0) if (res.has_symlinks != 0)
server->caps |= NFS_CAP_SYMLINKS; server->caps |= NFS_CAP_SYMLINKS;
if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID) if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
server->caps |= NFS_CAP_FILEID; server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
if (res.attr_bitmask[1] & FATTR4_WORD1_MODE) if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
server->caps |= NFS_CAP_MODE; server->fattr_valid &= ~NFS_ATTR_FATTR_MODE;
if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS) if (!(res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS))
server->caps |= NFS_CAP_NLINK; server->fattr_valid &= ~NFS_ATTR_FATTR_NLINK;
if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER) if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER))
server->caps |= NFS_CAP_OWNER; server->fattr_valid &= ~(NFS_ATTR_FATTR_OWNER |
if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP) NFS_ATTR_FATTR_OWNER_NAME);
server->caps |= NFS_CAP_OWNER_GROUP; if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP))
if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS) server->fattr_valid &= ~(NFS_ATTR_FATTR_GROUP |
server->caps |= NFS_CAP_ATIME; NFS_ATTR_FATTR_GROUP_NAME);
if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA) if (!(res.attr_bitmask[1] & FATTR4_WORD1_SPACE_USED))
server->caps |= NFS_CAP_CTIME; server->fattr_valid &= ~NFS_ATTR_FATTR_SPACE_USED;
if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY) if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS))
server->caps |= NFS_CAP_MTIME; server->fattr_valid &= ~NFS_ATTR_FATTR_ATIME;
if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA))
server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME;
if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
#ifdef CONFIG_NFS_V4_SECURITY_LABEL #ifdef CONFIG_NFS_V4_SECURITY_LABEL
if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) if (!(res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL))
server->caps |= NFS_CAP_SECURITY_LABEL; server->fattr_valid &= ~NFS_ATTR_FATTR_V4_SECURITY_LABEL;
#endif #endif
memcpy(server->attr_bitmask_nl, res.attr_bitmask, memcpy(server->attr_bitmask_nl, res.attr_bitmask,
sizeof(server->attr_bitmask)); sizeof(server->attr_bitmask));
@ -4154,8 +4188,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
if (inode && (server->flags & NFS_MOUNT_SOFTREVAL)) if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
task_flags |= RPC_TASK_TIMEOUT; task_flags |= RPC_TASK_TIMEOUT;
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode); nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode, 0);
nfs_fattr_init(fattr); nfs_fattr_init(fattr);
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0); nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
return nfs4_do_call_sync(server->client, server, &msg, return nfs4_do_call_sync(server->client, server, &msg,
@ -4582,11 +4615,11 @@ _nfs4_proc_remove(struct inode *dir, const struct qstr *name, u32 ftype)
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1); status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
if (status == 0) { if (status == 0) {
spin_lock(&dir->i_lock); spin_lock(&dir->i_lock);
nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
NFS_INO_INVALID_DATA);
/* Removing a directory decrements nlink in the parent */ /* Removing a directory decrements nlink in the parent */
if (ftype == NF4DIR && dir->i_nlink > 2) if (ftype == NF4DIR && dir->i_nlink > 2)
nfs4_dec_nlink_locked(dir); nfs4_dec_nlink_locked(dir);
nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock); spin_unlock(&dir->i_lock);
} }
return status; return status;
@ -4715,11 +4748,11 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
/* Note: If we moved a directory, nlink will change */ /* Note: If we moved a directory, nlink will change */
nfs4_update_changeattr(old_dir, &res->old_cinfo, nfs4_update_changeattr(old_dir, &res->old_cinfo,
res->old_fattr->time_start, res->old_fattr->time_start,
NFS_INO_INVALID_OTHER | NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_DATA); NFS_INO_INVALID_DATA);
nfs4_update_changeattr(new_dir, &res->new_cinfo, nfs4_update_changeattr(new_dir, &res->new_cinfo,
res->new_fattr->time_start, res->new_fattr->time_start,
NFS_INO_INVALID_OTHER | NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_DATA); NFS_INO_INVALID_DATA);
} else } else
nfs4_update_changeattr(old_dir, &res->old_cinfo, nfs4_update_changeattr(old_dir, &res->old_cinfo,
@ -4761,12 +4794,13 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, const struct
} }
nfs4_inode_make_writeable(inode); nfs4_inode_make_writeable(inode);
nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmask(server, res.label), inode); nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.label), inode,
NFS_INO_INVALID_CHANGE);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) { if (!status) {
nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start, nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
NFS_INO_INVALID_DATA); NFS_INO_INVALID_DATA);
nfs4_inc_nlink(inode);
status = nfs_post_op_update_inode(inode, res.fattr); status = nfs_post_op_update_inode(inode, res.fattr);
if (!status) if (!status)
nfs_setsecurity(inode, res.fattr, res.label); nfs_setsecurity(inode, res.fattr, res.label);
@ -4844,12 +4878,12 @@ static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_
&data->arg.seq_args, &data->res.seq_res, 1); &data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) { if (status == 0) {
spin_lock(&dir->i_lock); spin_lock(&dir->i_lock);
nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
data->res.fattr->time_start,
NFS_INO_INVALID_DATA);
/* Creating a directory bumps nlink in the parent */ /* Creating a directory bumps nlink in the parent */
if (data->arg.ftype == NF4DIR) if (data->arg.ftype == NF4DIR)
nfs4_inc_nlink_locked(dir); nfs4_inc_nlink_locked(dir);
nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
data->res.fattr->time_start,
NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock); spin_unlock(&dir->i_lock);
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label); status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
} }
@ -5416,37 +5450,39 @@ bool nfs4_write_need_cache_consistency_data(struct nfs_pgio_header *hdr)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0; return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
} }
static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
struct nfs_server *server, struct inode *inode, struct nfs_server *server,
struct nfs4_label *label) struct nfs4_label *label)
{ {
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity); unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
unsigned int i;
if ((cache_validity & NFS_INO_INVALID_DATA) || memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ);
(cache_validity & NFS_INO_REVAL_PAGECACHE) ||
(cache_validity & NFS_INO_REVAL_FORCED) ||
(cache_validity & NFS_INO_INVALID_OTHER))
nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
if (cache_validity & NFS_INO_INVALID_ATIME)
bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
if (cache_validity & NFS_INO_INVALID_OTHER)
bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
FATTR4_WORD1_OWNER_GROUP |
FATTR4_WORD1_NUMLINKS;
if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
if (cache_validity & NFS_INO_INVALID_CHANGE) if (cache_validity & NFS_INO_INVALID_CHANGE)
bitmask[0] |= FATTR4_WORD0_CHANGE; bitmask[0] |= FATTR4_WORD0_CHANGE;
if (cache_validity & NFS_INO_INVALID_ATIME)
bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
if (cache_validity & NFS_INO_INVALID_MODE)
bitmask[1] |= FATTR4_WORD1_MODE;
if (cache_validity & NFS_INO_INVALID_OTHER)
bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP;
if (cache_validity & NFS_INO_INVALID_NLINK)
bitmask[1] |= FATTR4_WORD1_NUMLINKS;
if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
if (cache_validity & NFS_INO_INVALID_CTIME) if (cache_validity & NFS_INO_INVALID_CTIME)
bitmask[1] |= FATTR4_WORD1_TIME_METADATA; bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
if (cache_validity & NFS_INO_INVALID_MTIME) if (cache_validity & NFS_INO_INVALID_MTIME)
bitmask[1] |= FATTR4_WORD1_TIME_MODIFY; bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
if (cache_validity & NFS_INO_INVALID_SIZE)
bitmask[0] |= FATTR4_WORD0_SIZE;
if (cache_validity & NFS_INO_INVALID_BLOCKS) if (cache_validity & NFS_INO_INVALID_BLOCKS)
bitmask[1] |= FATTR4_WORD1_SPACE_USED; bitmask[1] |= FATTR4_WORD1_SPACE_USED;
if (cache_validity & NFS_INO_INVALID_SIZE)
bitmask[0] |= FATTR4_WORD0_SIZE;
for (i = 0; i < NFS4_BITMASK_SZ; i++)
bitmask[i] &= server->attr_bitmask[i];
} }
static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr, static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
@ -5459,8 +5495,10 @@ static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
hdr->args.bitmask = NULL; hdr->args.bitmask = NULL;
hdr->res.fattr = NULL; hdr->res.fattr = NULL;
} else { } else {
hdr->args.bitmask = server->cache_consistency_bitmask; nfs4_bitmask_set(hdr->args.bitmask_store,
nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL); server->cache_consistency_bitmask,
hdr->inode, server, NULL);
hdr->args.bitmask = hdr->args.bitmask_store;
} }
if (!hdr->pgio_done_cb) if (!hdr->pgio_done_cb)
@ -5858,7 +5896,7 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
if (!nfs4_server_supports_acls(server)) if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP; return -EOPNOTSUPP;
ret = nfs_revalidate_inode(server, inode); ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret < 0) if (ret < 0)
return ret; return ret;
if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL) if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
@ -6502,8 +6540,10 @@ static int _nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred,
data->args.fhandle = &data->fh; data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid; data->args.stateid = &data->stateid;
data->args.bitmask = server->cache_consistency_bitmask; nfs4_bitmask_set(data->args.bitmask_store,
nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL); server->cache_consistency_bitmask, inode, server,
NULL);
data->args.bitmask = data->args.bitmask_store;
nfs_copy_fh(&data->fh, NFS_FH(inode)); nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid); nfs4_stateid_copy(&data->stateid, stateid);
data->res.fattr = &data->fattr; data->res.fattr = &data->fattr;
@ -7250,22 +7290,22 @@ nfs4_retry_setlk_simple(struct nfs4_state *state, int cmd,
#ifdef CONFIG_NFS_V4_1 #ifdef CONFIG_NFS_V4_1
struct nfs4_lock_waiter { struct nfs4_lock_waiter {
struct task_struct *task;
struct inode *inode; struct inode *inode;
struct nfs_lowner *owner; struct nfs_lowner owner;
wait_queue_entry_t wait;
}; };
static int static int
nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key) nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
{ {
int ret; struct nfs4_lock_waiter *waiter =
struct nfs4_lock_waiter *waiter = wait->private; container_of(wait, struct nfs4_lock_waiter, wait);
/* NULL key means to wake up everyone */ /* NULL key means to wake up everyone */
if (key) { if (key) {
struct cb_notify_lock_args *cbnl = key; struct cb_notify_lock_args *cbnl = key;
struct nfs_lowner *lowner = &cbnl->cbnl_owner, struct nfs_lowner *lowner = &cbnl->cbnl_owner,
*wowner = waiter->owner; *wowner = &waiter->owner;
/* Only wake if the callback was for the same owner. */ /* Only wake if the callback was for the same owner. */
if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev) if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev)
@ -7276,53 +7316,45 @@ nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, vo
return 0; return 0;
} }
/* override "private" so we can use default_wake_function */ return woken_wake_function(wait, mode, flags, key);
wait->private = waiter->task;
ret = woken_wake_function(wait, mode, flags, key);
if (ret)
list_del_init(&wait->entry);
wait->private = waiter;
return ret;
} }
static int static int
nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{ {
int status = -ERESTARTSYS;
struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner; struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
struct nfs_server *server = NFS_SERVER(state->inode); struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs_client *clp = server->nfs_client; struct nfs_client *clp = server->nfs_client;
wait_queue_head_t *q = &clp->cl_lock_waitq; wait_queue_head_t *q = &clp->cl_lock_waitq;
struct nfs_lowner owner = { .clientid = clp->cl_clientid, struct nfs4_lock_waiter waiter = {
.id = lsp->ls_seqid.owner_id, .inode = state->inode,
.s_dev = server->s_dev }; .owner = { .clientid = clp->cl_clientid,
struct nfs4_lock_waiter waiter = { .task = current, .id = lsp->ls_seqid.owner_id,
.inode = state->inode, .s_dev = server->s_dev },
.owner = &owner}; };
wait_queue_entry_t wait; int status;
/* Don't bother with waitqueue if we don't expect a callback */ /* Don't bother with waitqueue if we don't expect a callback */
if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags)) if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
return nfs4_retry_setlk_simple(state, cmd, request); return nfs4_retry_setlk_simple(state, cmd, request);
init_wait(&wait); init_wait(&waiter.wait);
wait.private = &waiter; waiter.wait.func = nfs4_wake_lock_waiter;
wait.func = nfs4_wake_lock_waiter; add_wait_queue(q, &waiter.wait);
while(!signalled()) { do {
add_wait_queue(q, &wait);
status = nfs4_proc_setlk(state, cmd, request); status = nfs4_proc_setlk(state, cmd, request);
if ((status != -EAGAIN) || IS_SETLK(cmd)) { if (status != -EAGAIN || IS_SETLK(cmd))
finish_wait(q, &wait);
break; break;
}
status = -ERESTARTSYS; status = -ERESTARTSYS;
freezer_do_not_count(); freezer_do_not_count();
wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT); wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
NFS4_LOCK_MAXTIMEOUT);
freezer_count(); freezer_count();
finish_wait(q, &wait); } while (!signalled());
}
remove_wait_queue(q, &waiter.wait);
return status; return status;
} }
@ -7615,7 +7647,7 @@ static int nfs4_xattr_get_nfs4_user(const struct xattr_handler *handler,
return -EACCES; return -EACCES;
} }
ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret) if (ret)
return ret; return ret;
@ -7646,7 +7678,7 @@ nfs4_listxattr_nfs4_user(struct inode *inode, char *list, size_t list_len)
return 0; return 0;
} }
ret = nfs_revalidate_inode(NFS_SERVER(inode), inode); ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret) if (ret)
return ret; return ret;

View file

@ -645,7 +645,7 @@ void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head)
} }
/** /**
* nfs4_purge_state_owners - Release all cached state owners * nfs4_free_state_owners - Release all cached state owners
* @head: resulting list of state owners * @head: resulting list of state owners
* *
* Frees a list of state owners that was generated by * Frees a list of state owners that was generated by

View file

@ -666,7 +666,42 @@ TRACE_EVENT(nfs4_state_mgr_failed,
) )
) )
TRACE_EVENT(nfs4_xdr_status, TRACE_EVENT(nfs4_xdr_bad_operation,
TP_PROTO(
const struct xdr_stream *xdr,
u32 op,
u32 expected
),
TP_ARGS(xdr, op, expected),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(u32, op)
__field(u32, expected)
),
TP_fast_assign(
const struct rpc_rqst *rqstp = xdr->rqst;
const struct rpc_task *task = rqstp->rq_task;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
__entry->xid = be32_to_cpu(rqstp->rq_xid);
__entry->op = op;
__entry->expected = expected;
),
TP_printk(
"task:%u@%d xid=0x%08x operation=%u, expected=%u",
__entry->task_id, __entry->client_id, __entry->xid,
__entry->op, __entry->expected
)
);
DECLARE_EVENT_CLASS(nfs4_xdr_event,
TP_PROTO( TP_PROTO(
const struct xdr_stream *xdr, const struct xdr_stream *xdr,
u32 op, u32 op,
@ -701,6 +736,16 @@ TRACE_EVENT(nfs4_xdr_status,
__entry->op __entry->op
) )
); );
#define DEFINE_NFS4_XDR_EVENT(name) \
DEFINE_EVENT(nfs4_xdr_event, name, \
TP_PROTO( \
const struct xdr_stream *xdr, \
u32 op, \
u32 error \
), \
TP_ARGS(xdr, op, error))
DEFINE_NFS4_XDR_EVENT(nfs4_xdr_status);
DEFINE_NFS4_XDR_EVENT(nfs4_xdr_bad_filehandle);
DECLARE_EVENT_CLASS(nfs4_cb_error_class, DECLARE_EVENT_CLASS(nfs4_cb_error_class,
TP_PROTO( TP_PROTO(

View file

@ -144,7 +144,17 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
* layout types will be returned. * layout types will be returned.
*/ */
#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + \ #define decode_fsinfo_maxsz (op_decode_hdr_maxsz + \
nfs4_fattr_bitmap_maxsz + 4 + 8 + 5) nfs4_fattr_bitmap_maxsz + 1 + \
1 /* lease time */ + \
2 /* max filesize */ + \
2 /* max read */ + \
2 /* max write */ + \
nfstime4_maxsz /* time delta */ + \
5 /* fs layout types */ + \
1 /* layout blksize */ + \
1 /* clone blksize */ + \
1 /* change attr type */ + \
1 /* xattr support */)
#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) #define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
#define decode_renew_maxsz (op_decode_hdr_maxsz) #define decode_renew_maxsz (op_decode_hdr_maxsz)
#define encode_setclientid_maxsz \ #define encode_setclientid_maxsz \
@ -3200,9 +3210,7 @@ out_status:
*nfs_retval = nfs4_stat_to_errno(nfserr); *nfs_retval = nfs4_stat_to_errno(nfserr);
return true; return true;
out_bad_operation: out_bad_operation:
dprintk("nfs: Server returned operation" trace_nfs4_xdr_bad_operation(xdr, opnum, expected);
" %d but we issued a request for %d\n",
opnum, expected);
*nfs_retval = -EREMOTEIO; *nfs_retval = -EREMOTEIO;
return false; return false;
out_overflow: out_overflow:
@ -3487,8 +3495,11 @@ static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, stru
if (unlikely(!p)) if (unlikely(!p))
return -EIO; return -EIO;
len = be32_to_cpup(p); len = be32_to_cpup(p);
if (len > NFS4_FHSIZE) if (len > NFS4_FHSIZE || len == 0) {
return -EIO; trace_nfs4_xdr_bad_filehandle(xdr, OP_READDIR,
NFS4ERR_BADHANDLE);
return -EREMOTEIO;
}
p = xdr_inline_decode(xdr, len); p = xdr_inline_decode(xdr, len);
if (unlikely(!p)) if (unlikely(!p))
return -EIO; return -EIO;
@ -4837,6 +4848,32 @@ static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
return 0; return 0;
} }
static int decode_attr_change_attr_type(struct xdr_stream *xdr,
uint32_t *bitmap,
enum nfs4_change_attr_type *res)
{
u32 tmp = NFS4_CHANGE_TYPE_IS_UNDEFINED;
dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
if (bitmap[2] & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
if (xdr_stream_decode_u32(xdr, &tmp))
return -EIO;
bitmap[2] &= ~FATTR4_WORD2_CHANGE_ATTR_TYPE;
}
switch(tmp) {
case NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR:
case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER:
case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS:
case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
*res = tmp;
break;
default:
*res = NFS4_CHANGE_TYPE_IS_UNDEFINED;
}
return 0;
}
static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
{ {
unsigned int savep; unsigned int savep;
@ -4885,6 +4922,11 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
if (status) if (status)
goto xdr_error; goto xdr_error;
status = decode_attr_change_attr_type(xdr, bitmap,
&fsinfo->change_attr_type);
if (status)
goto xdr_error;
status = decode_attr_xattrsupport(xdr, bitmap, status = decode_attr_xattrsupport(xdr, bitmap,
&fsinfo->xattr_support); &fsinfo->xattr_support);
if (status) if (status)
@ -4913,8 +4955,10 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
if (unlikely(!p)) if (unlikely(!p))
return -EIO; return -EIO;
len = be32_to_cpup(p); len = be32_to_cpup(p);
if (len > NFS4_FHSIZE) if (len > NFS4_FHSIZE || len == 0) {
return -EIO; trace_nfs4_xdr_bad_filehandle(xdr, OP_GETFH, NFS4ERR_BADHANDLE);
return -EREMOTEIO;
}
fh->size = len; fh->size = len;
p = xdr_inline_decode(xdr, len); p = xdr_inline_decode(xdr, len);
if (unlikely(!p)) if (unlikely(!p))

View file

@ -12,3 +12,4 @@
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter); EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status); EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_bad_filehandle);

View file

@ -45,6 +45,11 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_CTIME);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME); TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE); TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER); TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE);
#define nfs_show_cache_validity(v) \ #define nfs_show_cache_validity(v) \
__print_flags(v, "|", \ __print_flags(v, "|", \
@ -60,7 +65,11 @@ TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
{ NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \ { NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \
{ NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \ { NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \
{ NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \ { NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \
{ NFS_INO_INVALID_XATTR, "INVALID_XATTR" }) { NFS_INO_DATA_INVAL_DEFER, "DATA_INVAL_DEFER" }, \
{ NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \
{ NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \
{ NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \
{ NFS_INO_INVALID_MODE, "INVALID_MODE" })
TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS); TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS);
TRACE_DEFINE_ENUM(NFS_INO_STALE); TRACE_DEFINE_ENUM(NFS_INO_STALE);
@ -1392,7 +1401,7 @@ TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
{ NFSERR_BADTYPE, "BADTYPE" }, \ { NFSERR_BADTYPE, "BADTYPE" }, \
{ NFSERR_JUKEBOX, "JUKEBOX" }) { NFSERR_JUKEBOX, "JUKEBOX" })
TRACE_EVENT(nfs_xdr_status, DECLARE_EVENT_CLASS(nfs_xdr_event,
TP_PROTO( TP_PROTO(
const struct xdr_stream *xdr, const struct xdr_stream *xdr,
int error int error
@ -1434,6 +1443,15 @@ TRACE_EVENT(nfs_xdr_status,
nfs_show_status(__entry->error) nfs_show_status(__entry->error)
) )
); );
#define DEFINE_NFS_XDR_EVENT(name) \
DEFINE_EVENT(nfs_xdr_event, name, \
TP_PROTO( \
const struct xdr_stream *xdr, \
int error \
), \
TP_ARGS(xdr, error))
DEFINE_NFS_XDR_EVENT(nfs_xdr_status);
DEFINE_NFS_XDR_EVENT(nfs_xdr_bad_filehandle);
#endif /* _TRACE_NFS_H */ #endif /* _TRACE_NFS_H */

View file

@ -577,7 +577,7 @@ static void nfs_clear_request(struct nfs_page *req)
} }
/** /**
* nfs_release_request - Release the count on an NFS read/write request * nfs_free_request - Release the count on an NFS read/write request
* @req: request to release * @req: request to release
* *
* Note: Should never be called with the spinlock held! * Note: Should never be called with the spinlock held!
@ -1152,7 +1152,7 @@ nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc,
} }
/** /**
* nfs_pageio_add_request - Attempt to coalesce a request into a page list. * __nfs_pageio_add_request - Attempt to coalesce a request into a page list.
* @desc: destination io descriptor * @desc: destination io descriptor
* @req: request * @req: request
* *

View file

@ -1344,7 +1344,7 @@ _pnfs_return_layout(struct inode *ino)
} }
valid_layout = pnfs_layout_is_valid(lo); valid_layout = pnfs_layout_is_valid(lo);
pnfs_clear_layoutcommit(ino, &tmp_list); pnfs_clear_layoutcommit(ino, &tmp_list);
pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0); pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0);
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) { if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
struct pnfs_layout_range range = { struct pnfs_layout_range range = {
@ -2410,9 +2410,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
.iomode = IOMODE_ANY, .iomode = IOMODE_ANY,
.length = NFS4_MAX_UINT64, .length = NFS4_MAX_UINT64,
}; };
pnfs_set_plh_return_info(lo, IOMODE_ANY, 0); pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
&range, 0);
goto out_forget; goto out_forget;
} else { } else {
/* We have a completely new layout */ /* We have a completely new layout */
@ -2468,6 +2466,9 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
assert_spin_locked(&lo->plh_inode->i_lock); assert_spin_locked(&lo->plh_inode->i_lock);
if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
tmp_list = &lo->plh_return_segs;
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (pnfs_match_lseg_recall(lseg, return_range, seq)) { if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
dprintk("%s: marking lseg %p iomode %d " dprintk("%s: marking lseg %p iomode %d "
@ -2475,6 +2476,8 @@ pnfs_mark_matching_lsegs_return(struct pnfs_layout_hdr *lo,
lseg, lseg->pls_range.iomode, lseg, lseg->pls_range.iomode,
lseg->pls_range.offset, lseg->pls_range.offset,
lseg->pls_range.length); lseg->pls_range.length);
if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
tmp_list = &lo->plh_return_segs;
if (mark_lseg_invalid(lseg, tmp_list)) if (mark_lseg_invalid(lseg, tmp_list))
continue; continue;
remaining++; remaining++;

View file

@ -91,6 +91,7 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
info->dtpref = fsinfo.tsize; info->dtpref = fsinfo.tsize;
info->maxfilesize = 0x7FFFFFFF; info->maxfilesize = 0x7FFFFFFF;
info->lease_time = 0; info->lease_time = 0;
info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
return 0; return 0;
} }

View file

@ -116,16 +116,12 @@ static void unregister_nfs4_fs(void)
#ifdef CONFIG_NFS_V4_2 #ifdef CONFIG_NFS_V4_2
static void nfs_ssc_register_ops(void) static void nfs_ssc_register_ops(void)
{ {
#ifdef CONFIG_NFSD_V4
nfs_ssc_register(&nfs_ssc_clnt_ops_tbl); nfs_ssc_register(&nfs_ssc_clnt_ops_tbl);
#endif
} }
static void nfs_ssc_unregister_ops(void) static void nfs_ssc_unregister_ops(void)
{ {
#ifdef CONFIG_NFSD_V4
nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl); nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl);
#endif
} }
#endif /* CONFIG_NFS_V4_2 */ #endif /* CONFIG_NFS_V4_2 */
@ -867,7 +863,7 @@ static int nfs_request_mount(struct fs_context *fc,
* Now ask the mount server to map our export path * Now ask the mount server to map our export path
* to a file handle. * to a file handle.
*/ */
status = nfs_mount(&request); status = nfs_mount(&request, ctx->timeo, ctx->retrans);
if (status != 0) { if (status != 0) {
dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n", dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
request.hostname, status); request.hostname, status);

View file

@ -764,9 +764,6 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
* with invalidate/truncate. * with invalidate/truncate.
*/ */
spin_lock(&mapping->private_lock); spin_lock(&mapping->private_lock);
if (!nfs_have_writebacks(inode) &&
NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
inode_inc_iversion_raw(inode);
if (likely(!PageSwapCache(req->wb_page))) { if (likely(!PageSwapCache(req->wb_page))) {
set_bit(PG_MAPPED, &req->wb_flags); set_bit(PG_MAPPED, &req->wb_flags);
SetPagePrivate(req->wb_page); SetPagePrivate(req->wb_page);
@ -1293,7 +1290,7 @@ static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
if (nfs_have_delegated_attributes(inode)) if (nfs_have_delegated_attributes(inode))
goto out; goto out;
if (nfsi->cache_validity & if (nfsi->cache_validity &
(NFS_INO_REVAL_PAGECACHE | NFS_INO_INVALID_SIZE)) (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE))
return false; return false;
smp_rmb(); smp_rmb();
if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0) if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
@ -1604,7 +1601,7 @@ static int nfs_writeback_done(struct rpc_task *task,
/* Deal with the suid/sgid bit corner case */ /* Deal with the suid/sgid bit corner case */
if (nfs_should_remove_suid(inode)) { if (nfs_should_remove_suid(inode)) {
spin_lock(&inode->i_lock); spin_lock(&inode->i_lock);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER); nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
spin_unlock(&inode->i_lock); spin_unlock(&inode->i_lock);
} }
return 0; return 0;

View file

@ -138,7 +138,7 @@ config NFSD_FLEXFILELAYOUT
config NFSD_V4_2_INTER_SSC config NFSD_V4_2_INTER_SSC
bool "NFSv4.2 inter server to server COPY" bool "NFSv4.2 inter server to server COPY"
depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2 depends on NFSD_V4 && NFS_V4_2
help help
This option enables support for NFSv4.2 inter server to This option enables support for NFSv4.2 inter server to
server copy where the destination server calls the NFSv4.2 server copy where the destination server calls the NFSv4.2

View file

@ -452,6 +452,7 @@ enum lock_type4 {
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) #define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13)
#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
#define FATTR4_WORD2_MODE_UMASK (1UL << 17) #define FATTR4_WORD2_MODE_UMASK (1UL << 17)
#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18) #define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18)
@ -709,6 +710,14 @@ struct nl4_server {
} u; } u;
}; };
enum nfs4_change_attr_type {
NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
NFS4_CHANGE_TYPE_IS_UNDEFINED = 4,
};
/* /*
* Options for setxattr. These match the flags for setxattr(2). * Options for setxattr. These match the flags for setxattr(2).
*/ */

View file

@ -246,11 +246,15 @@ struct nfs4_copy_state {
BIT(13) /* Deferred cache invalidation */ BIT(13) /* Deferred cache invalidation */
#define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */ #define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */
#define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */ #define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */
#define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */
#define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */
#define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \ #define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \
| NFS_INO_INVALID_CTIME \ | NFS_INO_INVALID_CTIME \
| NFS_INO_INVALID_MTIME \ | NFS_INO_INVALID_MTIME \
| NFS_INO_INVALID_SIZE \ | NFS_INO_INVALID_SIZE \
| NFS_INO_INVALID_NLINK \
| NFS_INO_INVALID_MODE \
| NFS_INO_INVALID_OTHER) /* inode metadata is invalid */ | NFS_INO_INVALID_OTHER) /* inode metadata is invalid */
/* /*
@ -386,7 +390,7 @@ extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
extern int nfs_permission(struct user_namespace *, struct inode *, int); extern int nfs_permission(struct user_namespace *, struct inode *, int);
extern int nfs_open(struct inode *, struct file *); extern int nfs_open(struct inode *, struct file *);
extern int nfs_attribute_cache_expired(struct inode *inode); extern int nfs_attribute_cache_expired(struct inode *inode);
extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_clear_invalid_mapping(struct address_space *mapping); extern int nfs_clear_invalid_mapping(struct address_space *mapping);
extern bool nfs_mapping_need_revalidate_inode(struct inode *inode); extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);

View file

@ -156,6 +156,7 @@ struct nfs_server {
#define NFS_MOUNT_WRITE_EAGER 0x01000000 #define NFS_MOUNT_WRITE_EAGER 0x01000000
#define NFS_MOUNT_WRITE_WAIT 0x02000000 #define NFS_MOUNT_WRITE_WAIT 0x02000000
unsigned int fattr_valid; /* Valid attributes */
unsigned int caps; /* server capabilities */ unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */ unsigned int rsize; /* read size */
unsigned int rpages; /* read size (in pages) */ unsigned int rpages; /* read size (in pages) */
@ -180,6 +181,9 @@ struct nfs_server {
#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */ #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
enum nfs4_change_attr_type
change_attr_type;/* Description of change attribute */
struct nfs_fsid fsid; struct nfs_fsid fsid;
__u64 maxfilesize; /* maximum file size */ __u64 maxfilesize; /* maximum file size */
struct timespec64 time_delta; /* smallest time granularity */ struct timespec64 time_delta; /* smallest time granularity */
@ -265,16 +269,7 @@ struct nfs_server {
#define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_SYMLINKS (1U << 2)
#define NFS_CAP_ACLS (1U << 3) #define NFS_CAP_ACLS (1U << 3)
#define NFS_CAP_ATOMIC_OPEN (1U << 4) #define NFS_CAP_ATOMIC_OPEN (1U << 4)
/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */
#define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_LGOPEN (1U << 5)
#define NFS_CAP_FILEID (1U << 6)
#define NFS_CAP_MODE (1U << 7)
#define NFS_CAP_NLINK (1U << 8)
#define NFS_CAP_OWNER (1U << 9)
#define NFS_CAP_OWNER_GROUP (1U << 10)
#define NFS_CAP_ATIME (1U << 11)
#define NFS_CAP_CTIME (1U << 12)
#define NFS_CAP_MTIME (1U << 13)
#define NFS_CAP_POSIX_LOCK (1U << 14) #define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15) #define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16) #define NFS_CAP_STATEID_NFSV41 (1U << 16)

View file

@ -15,6 +15,8 @@
#define NFS_DEF_FILE_IO_SIZE (4096U) #define NFS_DEF_FILE_IO_SIZE (4096U)
#define NFS_MIN_FILE_IO_SIZE (1024U) #define NFS_MIN_FILE_IO_SIZE (1024U)
#define NFS_BITMASK_SZ 3
struct nfs4_string { struct nfs4_string {
unsigned int len; unsigned int len;
char *data; char *data;
@ -150,6 +152,8 @@ struct nfs_fsinfo {
__u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
__u32 blksize; /* preferred pnfs io block size */ __u32 blksize; /* preferred pnfs io block size */
__u32 clone_blksize; /* granularity of a CLONE operation */ __u32 clone_blksize; /* granularity of a CLONE operation */
enum nfs4_change_attr_type
change_attr_type; /* Info about change attr */
__u32 xattr_support; /* User xattrs supported */ __u32 xattr_support; /* User xattrs supported */
}; };
@ -525,7 +529,8 @@ struct nfs_closeargs {
struct nfs_seqid * seqid; struct nfs_seqid * seqid;
fmode_t fmode; fmode_t fmode;
u32 share_access; u32 share_access;
u32 * bitmask; const u32 * bitmask;
u32 bitmask_store[NFS_BITMASK_SZ];
struct nfs4_layoutreturn_args *lr_args; struct nfs4_layoutreturn_args *lr_args;
}; };
@ -608,7 +613,8 @@ struct nfs4_delegreturnargs {
struct nfs4_sequence_args seq_args; struct nfs4_sequence_args seq_args;
const struct nfs_fh *fhandle; const struct nfs_fh *fhandle;
const nfs4_stateid *stateid; const nfs4_stateid *stateid;
u32 * bitmask; const u32 *bitmask;
u32 bitmask_store[NFS_BITMASK_SZ];
struct nfs4_layoutreturn_args *lr_args; struct nfs4_layoutreturn_args *lr_args;
}; };
@ -648,7 +654,8 @@ struct nfs_pgio_args {
union { union {
unsigned int replen; /* used by read */ unsigned int replen; /* used by read */
struct { struct {
u32 * bitmask; /* used by write */ const u32 * bitmask; /* used by write */
u32 bitmask_store[NFS_BITMASK_SZ]; /* used by write */
enum nfs3_stable_how stable; /* used by write */ enum nfs3_stable_how stable; /* used by write */
}; };
}; };

View file

@ -247,6 +247,7 @@ struct rpc_xprt {
struct rpc_task * snd_task; /* Task blocked in send */ struct rpc_task * snd_task; /* Task blocked in send */
struct list_head xmit_queue; /* Send queue */ struct list_head xmit_queue; /* Send queue */
atomic_long_t xmit_queuelen;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
#if defined(CONFIG_SUNRPC_BACKCHANNEL) #if defined(CONFIG_SUNRPC_BACKCHANNEL)

View file

@ -60,6 +60,46 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class,
), \ ), \
TP_ARGS(wc, cid)) TP_ARGS(wc, cid))
DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class,
TP_PROTO(
const struct ib_wc *wc,
const struct rpc_rdma_cid *cid
),
TP_ARGS(wc, cid),
TP_STRUCT__entry(
__field(u32, cq_id)
__field(int, completion_id)
__field(unsigned long, status)
__field(unsigned int, vendor_err)
),
TP_fast_assign(
__entry->cq_id = cid->ci_queue_id;
__entry->completion_id = cid->ci_completion_id;
__entry->status = wc->status;
if (wc->status)
__entry->vendor_err = wc->vendor_err;
else
__entry->vendor_err = 0;
),
TP_printk("cq.id=%u mr.id=%d status=%s (%lu/0x%x)",
__entry->cq_id, __entry->completion_id,
rdma_show_wc_status(__entry->status),
__entry->status, __entry->vendor_err
)
);
#define DEFINE_MR_COMPLETION_EVENT(name) \
DEFINE_EVENT(rpcrdma_mr_completion_class, name, \
TP_PROTO( \
const struct ib_wc *wc, \
const struct rpc_rdma_cid *cid \
), \
TP_ARGS(wc, cid))
DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class, DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class,
TP_PROTO( TP_PROTO(
const struct ib_wc *wc, const struct ib_wc *wc,
@ -150,19 +190,17 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt,
TP_ARGS(r_xprt), TP_ARGS(r_xprt),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, r_xprt)
__string(addr, rpcrdma_addrstr(r_xprt)) __string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt))
), ),
TP_fast_assign( TP_fast_assign(
__entry->r_xprt = r_xprt;
__assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt));
), ),
TP_printk("peer=[%s]:%s r_xprt=%p", TP_printk("peer=[%s]:%s",
__get_str(addr), __get_str(port), __entry->r_xprt __get_str(addr), __get_str(port)
) )
); );
@ -182,7 +220,6 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
TP_ARGS(r_xprt, rc), TP_ARGS(r_xprt, rc),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(int, rc) __field(int, rc)
__field(int, connect_status) __field(int, connect_status)
__string(addr, rpcrdma_addrstr(r_xprt)) __string(addr, rpcrdma_addrstr(r_xprt))
@ -190,15 +227,14 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class,
), ),
TP_fast_assign( TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->rc = rc; __entry->rc = rc;
__entry->connect_status = r_xprt->rx_ep->re_connect_status; __entry->connect_status = r_xprt->rx_ep->re_connect_status;
__assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt));
), ),
TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d", TP_printk("peer=[%s]:%s rc=%d connection status=%d",
__get_str(addr), __get_str(port), __entry->r_xprt, __get_str(addr), __get_str(port),
__entry->rc, __entry->connect_status __entry->rc, __entry->connect_status
) )
); );
@ -343,7 +379,7 @@ DECLARE_EVENT_CLASS(xprtrdma_mr_class,
__entry->task_id = task->tk_pid; __entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid; __entry->client_id = task->tk_client->cl_clid;
__entry->mr_id = mr->frwr.fr_mr->res.id; __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents; __entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle; __entry->handle = mr->mr_handle;
__entry->length = mr->mr_length; __entry->length = mr->mr_length;
@ -384,7 +420,7 @@ DECLARE_EVENT_CLASS(xprtrdma_anonymous_mr_class,
), ),
TP_fast_assign( TP_fast_assign(
__entry->mr_id = mr->frwr.fr_mr->res.id; __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents; __entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle; __entry->handle = mr->mr_handle;
__entry->length = mr->mr_length; __entry->length = mr->mr_length;
@ -495,22 +531,19 @@ TRACE_EVENT(xprtrdma_op_connect,
TP_ARGS(r_xprt, delay), TP_ARGS(r_xprt, delay),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned long, delay) __field(unsigned long, delay)
__string(addr, rpcrdma_addrstr(r_xprt)) __string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt))
), ),
TP_fast_assign( TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->delay = delay; __entry->delay = delay;
__assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt));
), ),
TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu", TP_printk("peer=[%s]:%s delay=%lu",
__get_str(addr), __get_str(port), __entry->r_xprt, __get_str(addr), __get_str(port), __entry->delay
__entry->delay
) )
); );
@ -525,7 +558,6 @@ TRACE_EVENT(xprtrdma_op_set_cto,
TP_ARGS(r_xprt, connect, reconnect), TP_ARGS(r_xprt, connect, reconnect),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned long, connect) __field(unsigned long, connect)
__field(unsigned long, reconnect) __field(unsigned long, reconnect)
__string(addr, rpcrdma_addrstr(r_xprt)) __string(addr, rpcrdma_addrstr(r_xprt))
@ -533,51 +565,18 @@ TRACE_EVENT(xprtrdma_op_set_cto,
), ),
TP_fast_assign( TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->connect = connect; __entry->connect = connect;
__entry->reconnect = reconnect; __entry->reconnect = reconnect;
__assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt));
), ),
TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu", TP_printk("peer=[%s]:%s connect=%lu reconnect=%lu",
__get_str(addr), __get_str(port), __entry->r_xprt, __get_str(addr), __get_str(port),
__entry->connect / HZ, __entry->reconnect / HZ __entry->connect / HZ, __entry->reconnect / HZ
) )
); );
TRACE_EVENT(xprtrdma_qp_event,
TP_PROTO(
const struct rpcrdma_ep *ep,
const struct ib_event *event
),
TP_ARGS(ep, event),
TP_STRUCT__entry(
__field(unsigned long, event)
__string(name, event->device->name)
__array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
__array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
),
TP_fast_assign(
const struct rdma_cm_id *id = ep->re_id;
__entry->event = event->event;
__assign_str(name, event->device->name);
memcpy(__entry->srcaddr, &id->route.addr.src_addr,
sizeof(struct sockaddr_in6));
memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
sizeof(struct sockaddr_in6));
),
TP_printk("%pISpc -> %pISpc device=%s %s (%lu)",
__entry->srcaddr, __entry->dstaddr, __get_str(name),
rdma_show_ib_event(__entry->event), __entry->event
)
);
/** /**
** Call events ** Call events
**/ **/
@ -591,22 +590,19 @@ TRACE_EVENT(xprtrdma_createmrs,
TP_ARGS(r_xprt, count), TP_ARGS(r_xprt, count),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, r_xprt)
__string(addr, rpcrdma_addrstr(r_xprt)) __string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt))
__field(unsigned int, count) __field(unsigned int, count)
), ),
TP_fast_assign( TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->count = count; __entry->count = count;
__assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt));
), ),
TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs", TP_printk("peer=[%s]:%s created %u MRs",
__get_str(addr), __get_str(port), __entry->r_xprt, __get_str(addr), __get_str(port), __entry->count
__entry->count
) )
); );
@ -829,7 +825,7 @@ TRACE_EVENT(xprtrdma_post_recvs,
TP_ARGS(r_xprt, count, status), TP_ARGS(r_xprt, count, status),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, r_xprt) __field(u32, cq_id)
__field(unsigned int, count) __field(unsigned int, count)
__field(int, status) __field(int, status)
__field(int, posted) __field(int, posted)
@ -838,16 +834,18 @@ TRACE_EVENT(xprtrdma_post_recvs,
), ),
TP_fast_assign( TP_fast_assign(
__entry->r_xprt = r_xprt; const struct rpcrdma_ep *ep = r_xprt->rx_ep;
__entry->cq_id = ep->re_attr.recv_cq->res.id;
__entry->count = count; __entry->count = count;
__entry->status = status; __entry->status = status;
__entry->posted = r_xprt->rx_ep->re_receive_count; __entry->posted = ep->re_receive_count;
__assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt));
), ),
TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)", TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active (rc %d)",
__get_str(addr), __get_str(port), __entry->r_xprt, __get_str(addr), __get_str(port), __entry->cq_id,
__entry->count, __entry->posted, __entry->status __entry->count, __entry->posted, __entry->status
) )
); );
@ -886,10 +884,10 @@ TRACE_EVENT(xprtrdma_post_linv_err,
DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive); DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_send); DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg); DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_fastreg);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_li); DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake); DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_wake);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done); DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_done);
TRACE_EVENT(xprtrdma_frwr_alloc, TRACE_EVENT(xprtrdma_frwr_alloc,
TP_PROTO( TP_PROTO(
@ -905,7 +903,7 @@ TRACE_EVENT(xprtrdma_frwr_alloc,
), ),
TP_fast_assign( TP_fast_assign(
__entry->mr_id = mr->frwr.fr_mr->res.id; __entry->mr_id = mr->mr_ibmr->res.id;
__entry->rc = rc; __entry->rc = rc;
), ),
@ -933,7 +931,7 @@ TRACE_EVENT(xprtrdma_frwr_dereg,
), ),
TP_fast_assign( TP_fast_assign(
__entry->mr_id = mr->frwr.fr_mr->res.id; __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents; __entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle; __entry->handle = mr->mr_handle;
__entry->length = mr->mr_length; __entry->length = mr->mr_length;
@ -966,7 +964,7 @@ TRACE_EVENT(xprtrdma_frwr_sgerr,
), ),
TP_fast_assign( TP_fast_assign(
__entry->mr_id = mr->frwr.fr_mr->res.id; __entry->mr_id = mr->mr_ibmr->res.id;
__entry->addr = mr->mr_sg->dma_address; __entry->addr = mr->mr_sg->dma_address;
__entry->dir = mr->mr_dir; __entry->dir = mr->mr_dir;
__entry->nents = sg_nents; __entry->nents = sg_nents;
@ -996,7 +994,7 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
), ),
TP_fast_assign( TP_fast_assign(
__entry->mr_id = mr->frwr.fr_mr->res.id; __entry->mr_id = mr->mr_ibmr->res.id;
__entry->addr = mr->mr_sg->dma_address; __entry->addr = mr->mr_sg->dma_address;
__entry->dir = mr->mr_dir; __entry->dir = mr->mr_dir;
__entry->num_mapped = num_mapped; __entry->num_mapped = num_mapped;
@ -1010,11 +1008,12 @@ TRACE_EVENT(xprtrdma_frwr_maperr,
) )
); );
DEFINE_MR_EVENT(fastreg);
DEFINE_MR_EVENT(localinv); DEFINE_MR_EVENT(localinv);
DEFINE_MR_EVENT(reminv);
DEFINE_MR_EVENT(map); DEFINE_MR_EVENT(map);
DEFINE_ANON_MR_EVENT(unmap); DEFINE_ANON_MR_EVENT(unmap);
DEFINE_ANON_MR_EVENT(recycle);
TRACE_EVENT(xprtrdma_dma_maperr, TRACE_EVENT(xprtrdma_dma_maperr,
TP_PROTO( TP_PROTO(
@ -1248,22 +1247,19 @@ TRACE_EVENT(xprtrdma_cb_setup,
TP_ARGS(r_xprt, reqs), TP_ARGS(r_xprt, reqs),
TP_STRUCT__entry( TP_STRUCT__entry(
__field(const void *, r_xprt)
__field(unsigned int, reqs) __field(unsigned int, reqs)
__string(addr, rpcrdma_addrstr(r_xprt)) __string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt))
), ),
TP_fast_assign( TP_fast_assign(
__entry->r_xprt = r_xprt;
__entry->reqs = reqs; __entry->reqs = reqs;
__assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt));
), ),
TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs", TP_printk("peer=[%s]:%s %u reqs",
__get_str(addr), __get_str(port), __get_str(addr), __get_str(port), __entry->reqs
__entry->r_xprt, __entry->reqs
) )
); );

View file

@ -1079,6 +1079,46 @@ TRACE_EVENT(xprt_transmit,
__entry->seqno, __entry->status) __entry->seqno, __entry->status)
); );
TRACE_EVENT(xprt_retransmit,
TP_PROTO(
const struct rpc_rqst *rqst
),
TP_ARGS(rqst),
TP_STRUCT__entry(
__field(unsigned int, task_id)
__field(unsigned int, client_id)
__field(u32, xid)
__field(int, ntrans)
__field(int, version)
__string(progname,
rqst->rq_task->tk_client->cl_program->name)
__string(procedure,
rqst->rq_task->tk_msg.rpc_proc->p_name)
),
TP_fast_assign(
struct rpc_task *task = rqst->rq_task;
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client ?
task->tk_client->cl_clid : -1;
__entry->xid = be32_to_cpu(rqst->rq_xid);
__entry->ntrans = rqst->rq_ntrans;
__assign_str(progname,
task->tk_client->cl_program->name)
__entry->version = task->tk_client->cl_vers;
__assign_str(procedure, task->tk_msg.rpc_proc->p_name)
),
TP_printk(
"task:%u@%u xid=0x%08x %sv%d %s ntrans=%d",
__entry->task_id, __entry->client_id, __entry->xid,
__get_str(progname), __entry->version, __get_str(procedure),
__entry->ntrans)
);
TRACE_EVENT(xprt_ping, TRACE_EVENT(xprt_ping,
TP_PROTO(const struct rpc_xprt *xprt, int status), TP_PROTO(const struct rpc_xprt *xprt, int status),
@ -1141,7 +1181,6 @@ DECLARE_EVENT_CLASS(xprt_writelock_event,
DEFINE_WRITELOCK_EVENT(reserve_xprt); DEFINE_WRITELOCK_EVENT(reserve_xprt);
DEFINE_WRITELOCK_EVENT(release_xprt); DEFINE_WRITELOCK_EVENT(release_xprt);
DEFINE_WRITELOCK_EVENT(transmit_queued);
DECLARE_EVENT_CLASS(xprt_cong_event, DECLARE_EVENT_CLASS(xprt_cong_event,
TP_PROTO( TP_PROTO(

View file

@ -1799,7 +1799,6 @@ call_allocate(struct rpc_task *task)
status = xprt->ops->buf_alloc(task); status = xprt->ops->buf_alloc(task);
trace_rpc_buf_alloc(task, status); trace_rpc_buf_alloc(task, status);
xprt_inject_disconnect(xprt);
if (status == 0) if (status == 0)
return; return;
if (status != -ENOMEM) { if (status != -ENOMEM) {
@ -2457,12 +2456,6 @@ call_decode(struct rpc_task *task)
task->tk_flags &= ~RPC_CALL_MAJORSEEN; task->tk_flags &= ~RPC_CALL_MAJORSEEN;
} }
/*
* Ensure that we see all writes made by xprt_complete_rqst()
* before it changed req->rq_reply_bytes_recvd.
*/
smp_rmb();
/* /*
* Did we ever call xprt_complete_rqst()? If not, we should assume * Did we ever call xprt_complete_rqst()? If not, we should assume
* the message is incomplete. * the message is incomplete.
@ -2471,6 +2464,11 @@ call_decode(struct rpc_task *task)
if (!req->rq_reply_bytes_recvd) if (!req->rq_reply_bytes_recvd)
goto out; goto out;
/* Ensure that we see all writes made by xprt_complete_rqst()
* before it changed req->rq_reply_bytes_recvd.
*/
smp_rmb();
req->rq_rcv_buf.len = req->rq_private_buf.len; req->rq_rcv_buf.len = req->rq_private_buf.len;
trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf); trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf);

View file

@ -344,13 +344,15 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *nodename,
const char *hostname, const char *hostname,
struct sockaddr *srvaddr, size_t salen, struct sockaddr *srvaddr, size_t salen,
int proto, u32 version, int proto, u32 version,
const struct cred *cred) const struct cred *cred,
const struct rpc_timeout *timeo)
{ {
struct rpc_create_args args = { struct rpc_create_args args = {
.net = net, .net = net,
.protocol = proto, .protocol = proto,
.address = srvaddr, .address = srvaddr,
.addrsize = salen, .addrsize = salen,
.timeout = timeo,
.servername = hostname, .servername = hostname,
.nodename = nodename, .nodename = nodename,
.program = &rpcb_program, .program = &rpcb_program,
@ -705,7 +707,8 @@ void rpcb_getport_async(struct rpc_task *task)
clnt->cl_nodename, clnt->cl_nodename,
xprt->servername, sap, salen, xprt->servername, sap, salen,
xprt->prot, bind_version, xprt->prot, bind_version,
clnt->cl_cred); clnt->cl_cred,
task->tk_client->cl_timeout);
if (IS_ERR(rpcb_clnt)) { if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt); status = PTR_ERR(rpcb_clnt);
goto bailout_nofree; goto bailout_nofree;

View file

@ -698,9 +698,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout; const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
int status = 0; int status = 0;
if (time_before(jiffies, req->rq_minortimeo))
return status;
if (time_before(jiffies, req->rq_majortimeo)) { if (time_before(jiffies, req->rq_majortimeo)) {
if (time_before(jiffies, req->rq_minortimeo))
return status;
if (to->to_exponential) if (to->to_exponential)
req->rq_timeout <<= 1; req->rq_timeout <<= 1;
else else
@ -1352,6 +1352,7 @@ xprt_request_enqueue_transmit(struct rpc_task *task)
list_add_tail(&req->rq_xmit, &xprt->xmit_queue); list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
INIT_LIST_HEAD(&req->rq_xmit2); INIT_LIST_HEAD(&req->rq_xmit2);
out: out:
atomic_long_inc(&xprt->xmit_queuelen);
set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate); set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock); spin_unlock(&xprt->queue_lock);
} }
@ -1381,6 +1382,7 @@ xprt_request_dequeue_transmit_locked(struct rpc_task *task)
} }
} else } else
list_del(&req->rq_xmit2); list_del(&req->rq_xmit2);
atomic_long_dec(&req->rq_xprt->xmit_queuelen);
} }
/** /**
@ -1469,8 +1471,6 @@ bool xprt_prepare_transmit(struct rpc_task *task)
struct rpc_xprt *xprt = req->rq_xprt; struct rpc_xprt *xprt = req->rq_xprt;
if (!xprt_lock_write(xprt, task)) { if (!xprt_lock_write(xprt, task)) {
trace_xprt_transmit_queued(xprt, task);
/* Race breaker: someone may have transmitted us */ /* Race breaker: someone may have transmitted us */
if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
rpc_wake_up_queued_task_set_status(&xprt->sending, rpc_wake_up_queued_task_set_status(&xprt->sending,
@ -1483,7 +1483,10 @@ bool xprt_prepare_transmit(struct rpc_task *task)
void xprt_end_transmit(struct rpc_task *task) void xprt_end_transmit(struct rpc_task *task)
{ {
xprt_release_write(task->tk_rqstp->rq_xprt, task); struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
xprt_inject_disconnect(xprt);
xprt_release_write(xprt, task);
} }
/** /**
@ -1537,8 +1540,10 @@ xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
return status; return status;
} }
if (is_retrans) if (is_retrans) {
task->tk_client->cl_stats->rpcretrans++; task->tk_client->cl_stats->rpcretrans++;
trace_xprt_retransmit(req);
}
xprt_inject_disconnect(xprt); xprt_inject_disconnect(xprt);
@ -1885,7 +1890,6 @@ void xprt_release(struct rpc_task *task)
spin_unlock(&xprt->transport_lock); spin_unlock(&xprt->transport_lock);
if (req->rq_buffer) if (req->rq_buffer)
xprt->ops->buf_free(task); xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
xdr_free_bvec(&req->rq_rcv_buf); xdr_free_bvec(&req->rq_rcv_buf);
xdr_free_bvec(&req->rq_snd_buf); xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL) if (req->rq_cred != NULL)

View file

@ -155,9 +155,11 @@ void xprt_rdma_bc_destroy(struct rpc_xprt *xprt, unsigned int reqs)
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{ {
struct rpcrdma_req *req = rpcr_to_rdmar(rqst); struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_rep *rep = req->rl_reply;
struct rpc_xprt *xprt = rqst->rq_xprt; struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
rpcrdma_recv_buffer_put(req->rl_reply); rpcrdma_rep_put(&r_xprt->rx_buf, rep);
req->rl_reply = NULL; req->rl_reply = NULL;
spin_lock(&xprt->bc_pa_lock); spin_lock(&xprt->bc_pa_lock);

View file

@ -49,20 +49,13 @@
# define RPCDBG_FACILITY RPCDBG_TRANS # define RPCDBG_FACILITY RPCDBG_TRANS
#endif #endif
/** static void frwr_cid_init(struct rpcrdma_ep *ep,
* frwr_release_mr - Destroy one MR struct rpcrdma_mr *mr)
* @mr: MR allocated by frwr_mr_init
*
*/
void frwr_release_mr(struct rpcrdma_mr *mr)
{ {
int rc; struct rpc_rdma_cid *cid = &mr->mr_cid;
rc = ib_dereg_mr(mr->frwr.fr_mr); cid->ci_queue_id = ep->re_attr.send_cq->res.id;
if (rc) cid->ci_completion_id = mr->mr_ibmr->res.id;
trace_xprtrdma_frwr_dereg(mr, rc);
kfree(mr->mr_sg);
kfree(mr);
} }
static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
@ -75,20 +68,22 @@ static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
} }
} }
static void frwr_mr_recycle(struct rpcrdma_mr *mr) /**
* frwr_mr_release - Destroy one MR
* @mr: MR allocated by frwr_mr_init
*
*/
void frwr_mr_release(struct rpcrdma_mr *mr)
{ {
struct rpcrdma_xprt *r_xprt = mr->mr_xprt; int rc;
trace_xprtrdma_mr_recycle(mr); frwr_mr_unmap(mr->mr_xprt, mr);
frwr_mr_unmap(r_xprt, mr); rc = ib_dereg_mr(mr->mr_ibmr);
if (rc)
spin_lock(&r_xprt->rx_buf.rb_lock); trace_xprtrdma_frwr_dereg(mr, rc);
list_del(&mr->mr_all); kfree(mr->mr_sg);
r_xprt->rx_stats.mrs_recycled++; kfree(mr);
spin_unlock(&r_xprt->rx_buf.rb_lock);
frwr_release_mr(mr);
} }
static void frwr_mr_put(struct rpcrdma_mr *mr) static void frwr_mr_put(struct rpcrdma_mr *mr)
@ -144,10 +139,11 @@ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
goto out_list_err; goto out_list_err;
mr->mr_xprt = r_xprt; mr->mr_xprt = r_xprt;
mr->frwr.fr_mr = frmr; mr->mr_ibmr = frmr;
mr->mr_device = NULL; mr->mr_device = NULL;
INIT_LIST_HEAD(&mr->mr_list); INIT_LIST_HEAD(&mr->mr_list);
init_completion(&mr->frwr.fr_linv_done); init_completion(&mr->mr_linv_done);
frwr_cid_init(ep, mr);
sg_init_table(sg, depth); sg_init_table(sg, depth);
mr->mr_sg = sg; mr->mr_sg = sg;
@ -257,6 +253,7 @@ int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device)
ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
ep->re_attr.cap.max_recv_wr = ep->re_max_requests; ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH;
ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
ep->re_max_rdma_segs = ep->re_max_rdma_segs =
@ -326,7 +323,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
goto out_dmamap_err; goto out_dmamap_err;
mr->mr_device = ep->re_id->device; mr->mr_device = ep->re_id->device;
ibmr = mr->frwr.fr_mr; ibmr = mr->mr_ibmr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE); n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
if (n != dma_nents) if (n != dma_nents)
goto out_mapmr_err; goto out_mapmr_err;
@ -336,7 +333,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
key = (u8)(ibmr->rkey & 0x000000FF); key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key); ib_update_fast_reg_key(ibmr, ++key);
reg_wr = &mr->frwr.fr_regwr; reg_wr = &mr->mr_regwr;
reg_wr->mr = ibmr; reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey; reg_wr->key = ibmr->rkey;
reg_wr->access = writing ? reg_wr->access = writing ?
@ -364,29 +361,19 @@ out_mapmr_err:
* @cq: completion queue * @cq: completion queue
* @wc: WCE for a completed FastReg WR * @wc: WCE for a completed FastReg WR
* *
* Each flushed MR gets destroyed after the QP has drained.
*/ */
static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct ib_cqe *cqe = wc->wr_cqe; struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_frwr *frwr = struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid); trace_xprtrdma_wc_fastreg(wc, &mr->mr_cid);
/* The MR will get recycled when the associated req is retransmitted */
rpcrdma_flush_disconnect(cq->cq_context, wc); rpcrdma_flush_disconnect(cq->cq_context, wc);
} }
static void frwr_cid_init(struct rpcrdma_ep *ep,
struct rpcrdma_frwr *frwr)
{
struct rpc_rdma_cid *cid = &frwr->fr_cid;
cid->ci_queue_id = ep->re_attr.send_cq->res.id;
cid->ci_completion_id = frwr->fr_mr->res.id;
}
/** /**
* frwr_send - post Send WRs containing the RPC Call message * frwr_send - post Send WRs containing the RPC Call message
* @r_xprt: controlling transport instance * @r_xprt: controlling transport instance
@ -403,27 +390,36 @@ static void frwr_cid_init(struct rpcrdma_ep *ep,
*/ */
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{ {
struct ib_send_wr *post_wr, *send_wr = &req->rl_wr;
struct rpcrdma_ep *ep = r_xprt->rx_ep; struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr; struct rpcrdma_mr *mr;
unsigned int num_wrs;
post_wr = &req->rl_wr; num_wrs = 1;
post_wr = send_wr;
list_for_each_entry(mr, &req->rl_registered, mr_list) { list_for_each_entry(mr, &req->rl_registered, mr_list) {
struct rpcrdma_frwr *frwr; trace_xprtrdma_mr_fastreg(mr);
frwr = &mr->frwr; mr->mr_cqe.done = frwr_wc_fastreg;
mr->mr_regwr.wr.next = post_wr;
frwr->fr_cqe.done = frwr_wc_fastreg; mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
frwr_cid_init(ep, frwr); mr->mr_regwr.wr.num_sge = 0;
frwr->fr_regwr.wr.next = post_wr; mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe; mr->mr_regwr.wr.send_flags = 0;
frwr->fr_regwr.wr.num_sge = 0; post_wr = &mr->mr_regwr.wr;
frwr->fr_regwr.wr.opcode = IB_WR_REG_MR; ++num_wrs;
frwr->fr_regwr.wr.send_flags = 0;
post_wr = &frwr->fr_regwr.wr;
} }
if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
send_wr->send_flags |= IB_SEND_SIGNALED;
ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
num_wrs - ep->re_send_count);
} else {
send_wr->send_flags &= ~IB_SEND_SIGNALED;
ep->re_send_count -= num_wrs;
}
trace_xprtrdma_post_send(req);
return ib_post_send(ep->re_id->qp, post_wr, NULL); return ib_post_send(ep->re_id->qp, post_wr, NULL);
} }
@ -440,6 +436,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
list_for_each_entry(mr, mrs, mr_list) list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) { if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list); list_del_init(&mr->mr_list);
trace_xprtrdma_mr_reminv(mr);
frwr_mr_put(mr); frwr_mr_put(mr);
break; /* only one invalidated MR per RPC */ break; /* only one invalidated MR per RPC */
} }
@ -447,9 +444,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr) static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
{ {
if (wc->status != IB_WC_SUCCESS) if (likely(wc->status == IB_WC_SUCCESS))
frwr_mr_recycle(mr);
else
frwr_mr_put(mr); frwr_mr_put(mr);
} }
@ -462,12 +457,10 @@ static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct ib_cqe *cqe = wc->wr_cqe; struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_frwr *frwr = struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_li(wc, &frwr->fr_cid); trace_xprtrdma_wc_li(wc, &mr->mr_cid);
frwr_mr_done(wc, mr); frwr_mr_done(wc, mr);
rpcrdma_flush_disconnect(cq->cq_context, wc); rpcrdma_flush_disconnect(cq->cq_context, wc);
@ -483,14 +476,12 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct ib_cqe *cqe = wc->wr_cqe; struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_frwr *frwr = struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
container_of(cqe, struct rpcrdma_frwr, fr_cqe);
struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_li_wake(wc, &frwr->fr_cid); trace_xprtrdma_wc_li_wake(wc, &mr->mr_cid);
frwr_mr_done(wc, mr); frwr_mr_done(wc, mr);
complete(&frwr->fr_linv_done); complete(&mr->mr_linv_done);
rpcrdma_flush_disconnect(cq->cq_context, wc); rpcrdma_flush_disconnect(cq->cq_context, wc);
} }
@ -511,7 +502,6 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
struct ib_send_wr *first, **prev, *last; struct ib_send_wr *first, **prev, *last;
struct rpcrdma_ep *ep = r_xprt->rx_ep; struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr; const struct ib_send_wr *bad_wr;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr; struct rpcrdma_mr *mr;
int rc; int rc;
@ -520,35 +510,34 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* Chain the LOCAL_INV Work Requests and post them with * Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call. * a single ib_post_send() call.
*/ */
frwr = NULL;
prev = &first; prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr); trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++; r_xprt->rx_stats.local_inv_needed++;
frwr = &mr->frwr; last = &mr->mr_invwr;
frwr->fr_cqe.done = frwr_wc_localinv;
frwr_cid_init(ep, frwr);
last = &frwr->fr_invwr;
last->next = NULL; last->next = NULL;
last->wr_cqe = &frwr->fr_cqe; last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL; last->sg_list = NULL;
last->num_sge = 0; last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV; last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED; last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle; last->ex.invalidate_rkey = mr->mr_handle;
last->wr_cqe->done = frwr_wc_localinv;
*prev = last; *prev = last;
prev = &last->next; prev = &last->next;
} }
mr = container_of(last, struct rpcrdma_mr, mr_invwr);
/* Strong send queue ordering guarantees that when the /* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain * last WR in the chain completes, all WRs in the chain
* are complete. * are complete.
*/ */
frwr->fr_cqe.done = frwr_wc_localinv_wake; last->wr_cqe->done = frwr_wc_localinv_wake;
reinit_completion(&frwr->fr_linv_done); reinit_completion(&mr->mr_linv_done);
/* Transport disconnect drains the receive CQ before it /* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us * replaces the QP. The RPC reply handler won't call us
@ -562,22 +551,12 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* not happen, so don't wait in that case. * not happen, so don't wait in that case.
*/ */
if (bad_wr != first) if (bad_wr != first)
wait_for_completion(&frwr->fr_linv_done); wait_for_completion(&mr->mr_linv_done);
if (!rc) if (!rc)
return; return;
/* Recycle MRs in the LOCAL_INV chain that did not get posted. /* On error, the MRs get destroyed once the QP has drained. */
*/
trace_xprtrdma_post_linv_err(req, rc); trace_xprtrdma_post_linv_err(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);
bad_wr = bad_wr->next;
list_del_init(&mr->mr_list);
frwr_mr_recycle(mr);
}
} }
/** /**
@ -589,20 +568,24 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc) static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
{ {
struct ib_cqe *cqe = wc->wr_cqe; struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_frwr *frwr = struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
container_of(cqe, struct rpcrdma_frwr, fr_cqe); struct rpcrdma_rep *rep;
struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
/* WARNING: Only wr_cqe and status are reliable at this point */ /* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_li_done(wc, &frwr->fr_cid); trace_xprtrdma_wc_li_done(wc, &mr->mr_cid);
frwr_mr_done(wc, mr);
/* Ensure @rep is generated before frwr_mr_done */ /* Ensure that @rep is generated before the MR is released */
rep = mr->mr_req->rl_reply;
smp_rmb(); smp_rmb();
rpcrdma_complete_rqst(rep);
rpcrdma_flush_disconnect(cq->cq_context, wc); if (wc->status != IB_WC_SUCCESS) {
if (rep)
rpcrdma_unpin_rqst(rep);
rpcrdma_flush_disconnect(cq->cq_context, wc);
return;
}
frwr_mr_put(mr);
rpcrdma_complete_rqst(rep);
} }
/** /**
@ -619,33 +602,29 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{ {
struct ib_send_wr *first, *last, **prev; struct ib_send_wr *first, *last, **prev;
struct rpcrdma_ep *ep = r_xprt->rx_ep; struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr; struct rpcrdma_mr *mr;
int rc; int rc;
/* Chain the LOCAL_INV Work Requests and post them with /* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call. * a single ib_post_send() call.
*/ */
frwr = NULL;
prev = &first; prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr); trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++; r_xprt->rx_stats.local_inv_needed++;
frwr = &mr->frwr; last = &mr->mr_invwr;
frwr->fr_cqe.done = frwr_wc_localinv;
frwr_cid_init(ep, frwr);
last = &frwr->fr_invwr;
last->next = NULL; last->next = NULL;
last->wr_cqe = &frwr->fr_cqe; last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL; last->sg_list = NULL;
last->num_sge = 0; last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV; last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED; last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle; last->ex.invalidate_rkey = mr->mr_handle;
last->wr_cqe->done = frwr_wc_localinv;
*prev = last; *prev = last;
prev = &last->next; prev = &last->next;
} }
@ -655,31 +634,23 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* are complete. The last completion will wake up the * are complete. The last completion will wake up the
* RPC waiter. * RPC waiter.
*/ */
frwr->fr_cqe.done = frwr_wc_localinv_done; last->wr_cqe->done = frwr_wc_localinv_done;
/* Transport disconnect drains the receive CQ before it /* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us * replaces the QP. The RPC reply handler won't call us
* unless re_id->qp is a valid pointer. * unless re_id->qp is a valid pointer.
*/ */
bad_wr = NULL; rc = ib_post_send(ep->re_id->qp, first, NULL);
rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
if (!rc) if (!rc)
return; return;
/* Recycle MRs in the LOCAL_INV chain that did not get posted. /* On error, the MRs get destroyed once the QP has drained. */
*/
trace_xprtrdma_post_linv_err(req, rc); trace_xprtrdma_post_linv_err(req, rc);
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);
bad_wr = bad_wr->next;
frwr_mr_recycle(mr);
}
/* The final LOCAL_INV WR in the chain is supposed to /* The final LOCAL_INV WR in the chain is supposed to
* do the wake. If it was never posted, the wake will * do the wake. If it was never posted, the wake does
* not happen, so wake here in that case. * not happen. Unpin the rqst in preparation for its
* retransmission.
*/ */
rpcrdma_complete_rqst(req->rl_reply); rpcrdma_unpin_rqst(req->rl_reply);
} }

View file

@ -1326,9 +1326,35 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
return -EIO; return -EIO;
} }
/* Perform XID lookup, reconstruction of the RPC reply, and /**
* RPC completion while holding the transport lock to ensure * rpcrdma_unpin_rqst - Release rqst without completing it
* the rep, rqst, and rq_task pointers remain stable. * @rep: RPC/RDMA Receive context
*
* This is done when a connection is lost so that a Reply
* can be dropped and its matching Call can be subsequently
* retransmitted on a new connection.
*/
void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
{
struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
struct rpc_rqst *rqst = rep->rr_rqst;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
req->rl_reply = NULL;
rep->rr_rqst = NULL;
spin_lock(&xprt->queue_lock);
xprt_unpin_rqst(rqst);
spin_unlock(&xprt->queue_lock);
}
/**
* rpcrdma_complete_rqst - Pass completed rqst back to RPC
* @rep: RPC/RDMA Receive context
*
* Reconstruct the RPC reply and complete the transaction
* while @rqst is still pinned to ensure the rep, rqst, and
* rq_task pointers remain stable.
*/ */
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep) void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
{ {
@ -1430,13 +1456,14 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
credits = 1; /* don't deadlock */ credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep->re_max_requests) else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests; credits = r_xprt->rx_ep->re_max_requests;
rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
false);
if (buf->rb_credits != credits) if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits); rpcrdma_update_cwnd(r_xprt, credits);
rpcrdma_post_recvs(r_xprt, false);
req = rpcr_to_rdmar(rqst); req = rpcr_to_rdmar(rqst);
if (unlikely(req->rl_reply)) if (unlikely(req->rl_reply))
rpcrdma_recv_buffer_put(req->rl_reply); rpcrdma_rep_put(buf, req->rl_reply);
req->rl_reply = rep; req->rl_reply = rep;
rep->rr_rqst = rqst; rep->rr_rqst = rqst;
@ -1464,5 +1491,5 @@ out_shortreply:
trace_xprtrdma_reply_short_err(rep); trace_xprtrdma_reply_short_err(rep);
out: out:
rpcrdma_recv_buffer_put(rep); rpcrdma_rep_put(buf, rep);
} }

View file

@ -262,8 +262,10 @@ xprt_rdma_connect_worker(struct work_struct *work)
* xprt_rdma_inject_disconnect - inject a connection fault * xprt_rdma_inject_disconnect - inject a connection fault
* @xprt: transport context * @xprt: transport context
* *
* If @xprt is connected, disconnect it to simulate spurious connection * If @xprt is connected, disconnect it to simulate spurious
* loss. * connection loss. Caller must hold @xprt's send lock to
* ensure that data structures and hardware resources are
* stable during the rdma_disconnect() call.
*/ */
static void static void
xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)

View file

@ -101,6 +101,12 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ep *ep = r_xprt->rx_ep; struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct rdma_cm_id *id = ep->re_id; struct rdma_cm_id *id = ep->re_id;
/* Wait for rpcrdma_post_recvs() to leave its critical
* section.
*/
if (atomic_inc_return(&ep->re_receiving) > 1)
wait_for_completion(&ep->re_done);
/* Flush Receives, then wait for deferred Reply work /* Flush Receives, then wait for deferred Reply work
* to complete. * to complete.
*/ */
@ -114,22 +120,6 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
rpcrdma_ep_put(ep); rpcrdma_ep_put(ep);
} }
/**
* rpcrdma_qp_event_handler - Handle one QP event (error notification)
* @event: details of the event
* @context: ep that owns QP where event occurred
*
* Called from the RDMA provider (device driver) possibly in an interrupt
* context. The QP is always destroyed before the ID, so the ID will be
* reliably available when this handler is invoked.
*/
static void rpcrdma_qp_event_handler(struct ib_event *event, void *context)
{
struct rpcrdma_ep *ep = context;
trace_xprtrdma_qp_event(ep, event);
}
/* Ensure xprt_force_disconnect() is invoked exactly once when a /* Ensure xprt_force_disconnect() is invoked exactly once when a
* connection is closed or lost. (The important thing is it needs * connection is closed or lost. (The important thing is it needs
* to be invoked "at least" once). * to be invoked "at least" once).
@ -205,7 +195,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
out_flushed: out_flushed:
rpcrdma_flush_disconnect(r_xprt, wc); rpcrdma_flush_disconnect(r_xprt, wc);
rpcrdma_rep_destroy(rep); rpcrdma_rep_put(&r_xprt->rx_buf, rep);
} }
static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
@ -414,6 +404,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
__module_get(THIS_MODULE); __module_get(THIS_MODULE);
device = id->device; device = id->device;
ep->re_id = id; ep->re_id = id;
reinit_completion(&ep->re_done);
ep->re_max_requests = r_xprt->rx_xprt.max_reqs; ep->re_max_requests = r_xprt->rx_xprt.max_reqs;
ep->re_inline_send = xprt_rdma_max_inline_write; ep->re_inline_send = xprt_rdma_max_inline_write;
@ -424,8 +415,6 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests);
ep->re_attr.event_handler = rpcrdma_qp_event_handler;
ep->re_attr.qp_context = ep;
ep->re_attr.srq = NULL; ep->re_attr.srq = NULL;
ep->re_attr.cap.max_inline_data = 0; ep->re_attr.cap.max_inline_data = 0;
ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@ -535,7 +524,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt)
* outstanding Receives. * outstanding Receives.
*/ */
rpcrdma_ep_get(ep); rpcrdma_ep_get(ep);
rpcrdma_post_recvs(r_xprt, true); rpcrdma_post_recvs(r_xprt, 1, true);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma); rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc) if (rc)
@ -954,13 +943,11 @@ static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt)
rpcrdma_req_reset(req); rpcrdma_req_reset(req);
} }
/* No locking needed here. This function is called only by the
* Receive completion handler.
*/
static noinline static noinline
struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
bool temp) bool temp)
{ {
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), GFP_KERNEL); rep = kzalloc(sizeof(*rep), GFP_KERNEL);
@ -987,7 +974,10 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1; rep->rr_recv_wr.num_sge = 1;
rep->rr_temp = temp; rep->rr_temp = temp;
list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps);
spin_lock(&buf->rb_lock);
list_add(&rep->rr_all, &buf->rb_all_reps);
spin_unlock(&buf->rb_lock);
return rep; return rep;
out_free_regbuf: out_free_regbuf:
@ -998,16 +988,23 @@ out:
return NULL; return NULL;
} }
/* No locking needed here. This function is invoked only by the static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
* Receive completion handler, or during transport shutdown.
*/
static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
{ {
list_del(&rep->rr_all);
rpcrdma_regbuf_free(rep->rr_rdmabuf); rpcrdma_regbuf_free(rep->rr_rdmabuf);
kfree(rep); kfree(rep);
} }
static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
{
struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
spin_lock(&buf->rb_lock);
list_del(&rep->rr_all);
spin_unlock(&buf->rb_lock);
rpcrdma_rep_free(rep);
}
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{ {
struct llist_node *node; struct llist_node *node;
@ -1019,12 +1016,21 @@ static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
return llist_entry(node, struct rpcrdma_rep, rr_node); return llist_entry(node, struct rpcrdma_rep, rr_node);
} }
static void rpcrdma_rep_put(struct rpcrdma_buffer *buf, /**
struct rpcrdma_rep *rep) * rpcrdma_rep_put - Release rpcrdma_rep back to free list
* @buf: buffer pool
* @rep: rep to release
*
*/
void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep)
{ {
llist_add(&rep->rr_node, &buf->rb_free_reps); llist_add(&rep->rr_node, &buf->rb_free_reps);
} }
/* Caller must ensure the QP is quiescent (RQ is drained) before
* invoking this function, to guarantee rb_all_reps is not
* changing.
*/
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
{ {
struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
@ -1032,7 +1038,7 @@ static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
rep->rr_temp = true; rep->rr_temp = true; /* Mark this rep for destruction */
} }
} }
@ -1040,8 +1046,18 @@ static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf)
{ {
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
while ((rep = rpcrdma_rep_get_locked(buf)) != NULL) spin_lock(&buf->rb_lock);
rpcrdma_rep_destroy(rep); while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
struct rpcrdma_rep,
rr_all)) != NULL) {
list_del(&rep->rr_all);
spin_unlock(&buf->rb_lock);
rpcrdma_rep_free(rep);
spin_lock(&buf->rb_lock);
}
spin_unlock(&buf->rb_lock);
} }
/** /**
@ -1104,7 +1120,7 @@ void rpcrdma_req_destroy(struct rpcrdma_req *req)
list_del(&mr->mr_all); list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock); spin_unlock(&buf->rb_lock);
frwr_release_mr(mr); frwr_mr_release(mr);
} }
rpcrdma_regbuf_free(req->rl_recvbuf); rpcrdma_regbuf_free(req->rl_recvbuf);
@ -1135,7 +1151,7 @@ static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt)
list_del(&mr->mr_all); list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock); spin_unlock(&buf->rb_lock);
frwr_release_mr(mr); frwr_mr_release(mr);
spin_lock(&buf->rb_lock); spin_lock(&buf->rb_lock);
} }
@ -1221,17 +1237,6 @@ void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
spin_unlock(&buffers->rb_lock); spin_unlock(&buffers->rb_lock);
} }
/**
* rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list
* @rep: rep to release
*
* Used after error conditions.
*/
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
{
rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep);
}
/* Returns a pointer to a rpcrdma_regbuf object, or NULL. /* Returns a pointer to a rpcrdma_regbuf object, or NULL.
* *
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
@ -1342,21 +1347,7 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
*/ */
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{ {
struct ib_send_wr *send_wr = &req->rl_wr; if (frwr_send(r_xprt, req))
struct rpcrdma_ep *ep = r_xprt->rx_ep;
int rc;
if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) {
send_wr->send_flags |= IB_SEND_SIGNALED;
ep->re_send_count = ep->re_send_batch;
} else {
send_wr->send_flags &= ~IB_SEND_SIGNALED;
--ep->re_send_count;
}
trace_xprtrdma_post_send(req);
rc = frwr_send(r_xprt, req);
if (rc)
return -ENOTCONN; return -ENOTCONN;
return 0; return 0;
} }
@ -1364,27 +1355,30 @@ int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/** /**
* rpcrdma_post_recvs - Refill the Receive Queue * rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance * @r_xprt: controlling transport instance
* @temp: mark Receive buffers to be deleted after use * @needed: current credit grant
* @temp: mark Receive buffers to be deleted after one use
* *
*/ */
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
{ {
struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep; struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_recv_wr *wr, *bad_wr; struct ib_recv_wr *wr, *bad_wr;
struct rpcrdma_rep *rep; struct rpcrdma_rep *rep;
int needed, count, rc; int count, rc;
rc = 0; rc = 0;
count = 0; count = 0;
needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
if (likely(ep->re_receive_count > needed)) if (likely(ep->re_receive_count > needed))
goto out; goto out;
needed -= ep->re_receive_count; needed -= ep->re_receive_count;
if (!temp) if (!temp)
needed += RPCRDMA_MAX_RECV_BATCH; needed += RPCRDMA_MAX_RECV_BATCH;
if (atomic_inc_return(&ep->re_receiving) > 1)
goto out;
/* fast path: all needed reps can be found on the free list */ /* fast path: all needed reps can be found on the free list */
wr = NULL; wr = NULL;
while (needed) { while (needed) {
@ -1410,6 +1404,9 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
rc = ib_post_recv(ep->re_id->qp, wr, rc = ib_post_recv(ep->re_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr); (const struct ib_recv_wr **)&bad_wr);
if (atomic_dec_return(&ep->re_receiving) > 0)
complete(&ep->re_done);
out: out:
trace_xprtrdma_post_recvs(r_xprt, count, rc); trace_xprtrdma_post_recvs(r_xprt, count, rc);
if (rc) { if (rc) {
@ -1418,7 +1415,7 @@ out:
rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
wr = wr->next; wr = wr->next;
rpcrdma_recv_buffer_put(rep); rpcrdma_rep_put(buf, rep);
--count; --count;
} }
} }

View file

@ -83,6 +83,7 @@ struct rpcrdma_ep {
unsigned int re_max_inline_recv; unsigned int re_max_inline_recv;
int re_async_rc; int re_async_rc;
int re_connect_status; int re_connect_status;
atomic_t re_receiving;
atomic_t re_force_disconnect; atomic_t re_force_disconnect;
struct ib_qp_init_attr re_attr; struct ib_qp_init_attr re_attr;
wait_queue_head_t re_connect_wait; wait_queue_head_t re_connect_wait;
@ -228,31 +229,28 @@ struct rpcrdma_sendctx {
* An external memory region is any buffer or page that is registered * An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered). * on the fly (ie, not pre-registered).
*/ */
struct rpcrdma_frwr {
struct ib_mr *fr_mr;
struct ib_cqe fr_cqe;
struct rpc_rdma_cid fr_cid;
struct completion fr_linv_done;
union {
struct ib_reg_wr fr_regwr;
struct ib_send_wr fr_invwr;
};
};
struct rpcrdma_req; struct rpcrdma_req;
struct rpcrdma_mr { struct rpcrdma_mr {
struct list_head mr_list; struct list_head mr_list;
struct rpcrdma_req *mr_req; struct rpcrdma_req *mr_req;
struct ib_mr *mr_ibmr;
struct ib_device *mr_device; struct ib_device *mr_device;
struct scatterlist *mr_sg; struct scatterlist *mr_sg;
int mr_nents; int mr_nents;
enum dma_data_direction mr_dir; enum dma_data_direction mr_dir;
struct rpcrdma_frwr frwr; struct ib_cqe mr_cqe;
struct completion mr_linv_done;
union {
struct ib_reg_wr mr_regwr;
struct ib_send_wr mr_invwr;
};
struct rpcrdma_xprt *mr_xprt; struct rpcrdma_xprt *mr_xprt;
u32 mr_handle; u32 mr_handle;
u32 mr_length; u32 mr_length;
u64 mr_offset; u64 mr_offset;
struct list_head mr_all; struct list_head mr_all;
struct rpc_rdma_cid mr_cid;
}; };
/* /*
@ -461,7 +459,7 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt); void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
/* /*
* Buffer calls - xprtrdma/verbs.c * Buffer calls - xprtrdma/verbs.c
@ -480,7 +478,7 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
struct rpcrdma_req *req); struct rpcrdma_req *req);
void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
gfp_t flags); gfp_t flags);
@ -527,7 +525,7 @@ rpcrdma_data_dir(bool writing)
void frwr_reset(struct rpcrdma_req *req); void frwr_reset(struct rpcrdma_req *req);
int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device); int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device);
int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr); int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr);
void frwr_release_mr(struct rpcrdma_mr *mr); void frwr_mr_release(struct rpcrdma_mr *mr);
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg, struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, __be32 xid, int nsegs, bool writing, __be32 xid,
@ -560,6 +558,7 @@ int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst);
void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep); void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep);
void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt); void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep); void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)

View file

@ -558,6 +558,10 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
struct rpc_rqst *req; struct rpc_rqst *req;
ssize_t ret; ssize_t ret;
/* Is this transport associated with the backchannel? */
if (!xprt->bc_serv)
return -ESHUTDOWN;
/* Look up and lock the request corresponding to the given XID */ /* Look up and lock the request corresponding to the given XID */
req = xprt_lookup_bc_request(xprt, transport->recv.xid); req = xprt_lookup_bc_request(xprt, transport->recv.xid);
if (!req) { if (!req) {
@ -1018,6 +1022,7 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
* to cope with writespace callbacks arriving _after_ we have * to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */ * called sendmsg(). */
req->rq_xtime = ktime_get(); req->rq_xtime = ktime_get();
tcp_sock_set_cork(transport->inet, true);
while (1) { while (1) {
status = xprt_sock_sendmsg(transport->sock, &msg, xdr, status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent); transport->xmit.offset, rm, &sent);
@ -1032,6 +1037,8 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
if (likely(req->rq_bytes_sent >= msglen)) { if (likely(req->rq_bytes_sent >= msglen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset; req->rq_xmit_bytes_sent += transport->xmit.offset;
transport->xmit.offset = 0; transport->xmit.offset = 0;
if (atomic_long_read(&xprt->xmit_queuelen) == 1)
tcp_sock_set_cork(transport->inet, false);
return 0; return 0;
} }
@ -2163,6 +2170,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
} }
xs_tcp_set_socket_timeouts(xprt, sock); xs_tcp_set_socket_timeouts(xprt, sock);
tcp_sock_set_nodelay(sk);
write_lock_bh(&sk->sk_callback_lock); write_lock_bh(&sk->sk_callback_lock);
@ -2177,7 +2185,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
/* socket options */ /* socket options */
sock_reset_flag(sk, SOCK_LINGER); sock_reset_flag(sk, SOCK_LINGER);
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
xprt_clear_connected(xprt); xprt_clear_connected(xprt);