diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 6b0228a21617..40289fe6f5b2 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -580,9 +580,24 @@ xfs_report_dioalign( struct xfs_buftarg *target = xfs_inode_buftarg(ip); struct block_device *bdev = target->bt_bdev; - stat->result_mask |= STATX_DIOALIGN; + stat->result_mask |= STATX_DIOALIGN | STATX_DIO_READ_ALIGN; stat->dio_mem_align = bdev_dma_alignment(bdev) + 1; - stat->dio_offset_align = bdev_logical_block_size(bdev); + + /* + * For COW inodes, we can only perform out of place writes of entire + * allocation units (blocks or RT extents). + * For writes smaller than the allocation unit, we must fall back to + * buffered I/O to perform read-modify-write cycles. At best this is + * highly inefficient; at worst it leads to page cache invalidation + * races. Tell applications to avoid this by reporting the larger write + * alignment in dio_offset_align, and the smaller read alignment in + * dio_read_offset_align. + */ + stat->dio_read_offset_align = bdev_logical_block_size(bdev); + if (xfs_is_cow_inode(ip)) + stat->dio_offset_align = xfs_inode_alloc_unitsize(ip); + else + stat->dio_offset_align = stat->dio_read_offset_align; } static void @@ -658,7 +673,7 @@ xfs_vn_getattr( stat->rdev = inode->i_rdev; break; case S_IFREG: - if (request_mask & STATX_DIOALIGN) + if (request_mask & (STATX_DIOALIGN | STATX_DIO_READ_ALIGN)) xfs_report_dioalign(ip, stat); if (request_mask & STATX_WRITE_ATOMIC) xfs_report_atomic_write(ip, stat);