mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 08:44:41 +00:00 
			
		
		
		
	 110dc24ad2
			
		
	
	
		110dc24ad2
		
	
	
	
	
		
			
			The addition of direct formatting of log items into the CIL linear buffer added alignment restrictions that the start of each vector needed to be 64 bit aligned. Hence padding was added in xlog_finish_iovec() to round up the vector length to ensure the next vector started with the correct alignment. This adds a small number of bytes to the size of the linear buffer that is otherwise unused. The issue is that we then use the linear buffer size to determine the log space used by the log item, and this includes the unused space. Hence when we account for space used by the log item, it's more than is actually written into the iclogs, and hence we slowly leak this space. This results on log hangs when reserving space, with threads getting stuck with these stack traces: Call Trace: [<ffffffff81d15989>] schedule+0x29/0x70 [<ffffffff8150d3a2>] xlog_grant_head_wait+0xa2/0x1a0 [<ffffffff8150d55d>] xlog_grant_head_check+0xbd/0x140 [<ffffffff8150ee33>] xfs_log_reserve+0x103/0x220 [<ffffffff814b7f05>] xfs_trans_reserve+0x2f5/0x310 ..... The 4 bytes is significant. Brain Foster did all the hard work in tracking down a reproducable leak to inode chunk allocation (it went away with the ikeep mount option). His rough numbers were that creating 50,000 inodes leaked 11 log blocks. This turns out to be roughly 800 inode chunks or 1600 inode cluster buffers. That works out at roughly 4 bytes per cluster buffer logged, and at that I started looking for a 4 byte leak in the buffer logging code. What I found was that a struct xfs_buf_log_format structure for an inode cluster buffer is 28 bytes in length. This gets rounded up to 32 bytes, but the vector length remains 28 bytes. Hence the CIL ticket reservation is decremented by 32 bytes (via lv->lv_buf_len) for that vector rather than 28 bytes which are written into the log. The fix for this problem is to separately track the bytes used by the log vectors in the item and use that instead of the buffer length when accounting for the log space that will be used by the formatted log item. Again, thanks to Brian Foster for doing all the hard work and long hours to isolate this leak and make finding the bug relatively simple. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
		
			
				
	
	
		
			193 lines
		
	
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			193 lines
		
	
	
	
		
			5.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
 | |
|  * All Rights Reserved.
 | |
|  *
 | |
|  * This program is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU General Public License as
 | |
|  * published by the Free Software Foundation.
 | |
|  *
 | |
|  * This program is distributed in the hope that it would be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|  * GNU General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU General Public License
 | |
|  * along with this program; if not, write the Free Software Foundation,
 | |
|  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 | |
|  */
 | |
| #ifndef	__XFS_LOG_H__
 | |
| #define __XFS_LOG_H__
 | |
| 
 | |
| struct xfs_log_vec {
 | |
| 	struct xfs_log_vec	*lv_next;	/* next lv in build list */
 | |
| 	int			lv_niovecs;	/* number of iovecs in lv */
 | |
| 	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */
 | |
| 	struct xfs_log_item	*lv_item;	/* owner */
 | |
| 	char			*lv_buf;	/* formatted buffer */
 | |
| 	int			lv_bytes;	/* accounted space in buffer */
 | |
| 	int			lv_buf_len;	/* aligned size of buffer */
 | |
| 	int			lv_size;	/* size of allocated lv */
 | |
| };
 | |
| 
 | |
| #define XFS_LOG_VEC_ORDERED	(-1)
 | |
| 
 | |
| static inline void *
 | |
| xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
 | |
| 		uint type)
 | |
| {
 | |
| 	struct xfs_log_iovec *vec = *vecp;
 | |
| 
 | |
| 	if (vec) {
 | |
| 		ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs);
 | |
| 		vec++;
 | |
| 	} else {
 | |
| 		vec = &lv->lv_iovecp[0];
 | |
| 	}
 | |
| 
 | |
| 	vec->i_type = type;
 | |
| 	vec->i_addr = lv->lv_buf + lv->lv_buf_len;
 | |
| 
 | |
| 	ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t)));
 | |
| 
 | |
| 	*vecp = vec;
 | |
| 	return vec->i_addr;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * We need to make sure the next buffer is naturally aligned for the biggest
 | |
|  * basic data type we put into it.  We already accounted for this padding when
 | |
|  * sizing the buffer.
 | |
|  *
 | |
|  * However, this padding does not get written into the log, and hence we have to
 | |
|  * track the space used by the log vectors separately to prevent log space hangs
 | |
|  * due to inaccurate accounting (i.e. a leak) of the used log space through the
 | |
|  * CIL context ticket.
 | |
|  */
 | |
| static inline void
 | |
| xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
 | |
| {
 | |
| 	lv->lv_buf_len += round_up(len, sizeof(uint64_t));
 | |
| 	lv->lv_bytes += len;
 | |
| 	vec->i_len = len;
 | |
| }
 | |
| 
 | |
| static inline void *
 | |
| xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
 | |
| 		uint type, void *data, int len)
 | |
| {
 | |
| 	void *buf;
 | |
| 
 | |
| 	buf = xlog_prepare_iovec(lv, vecp, type);
 | |
| 	memcpy(buf, data, len);
 | |
| 	xlog_finish_iovec(lv, *vecp, len);
 | |
| 	return buf;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Structure used to pass callback function and the function's argument
 | |
|  * to the log manager.
 | |
|  */
 | |
| typedef struct xfs_log_callback {
 | |
| 	struct xfs_log_callback	*cb_next;
 | |
| 	void			(*cb_func)(void *, int);
 | |
| 	void			*cb_arg;
 | |
| } xfs_log_callback_t;
 | |
| 
 | |
| /*
 | |
|  * By comparing each component, we don't have to worry about extra
 | |
|  * endian issues in treating two 32 bit numbers as one 64 bit number
 | |
|  */
 | |
| static inline xfs_lsn_t	_lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
 | |
| {
 | |
| 	if (CYCLE_LSN(lsn1) != CYCLE_LSN(lsn2))
 | |
| 		return (CYCLE_LSN(lsn1)<CYCLE_LSN(lsn2))? -999 : 999;
 | |
| 
 | |
| 	if (BLOCK_LSN(lsn1) != BLOCK_LSN(lsn2))
 | |
| 		return (BLOCK_LSN(lsn1)<BLOCK_LSN(lsn2))? -999 : 999;
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| #define	XFS_LSN_CMP(x,y) _lsn_cmp(x,y)
 | |
| 
 | |
| /*
 | |
|  * Macros, structures, prototypes for interface to the log manager.
 | |
|  */
 | |
| 
 | |
| /*
 | |
|  * Flags to xfs_log_done()
 | |
|  */
 | |
| #define XFS_LOG_REL_PERM_RESERV	0x1
 | |
| 
 | |
| /*
 | |
|  * Flags to xfs_log_force()
 | |
|  *
 | |
|  *	XFS_LOG_SYNC:	Synchronous force in-core log to disk
 | |
|  */
 | |
| #define XFS_LOG_SYNC		0x1
 | |
| 
 | |
| /* Log manager interfaces */
 | |
| struct xfs_mount;
 | |
| struct xlog_in_core;
 | |
| struct xlog_ticket;
 | |
| struct xfs_log_item;
 | |
| struct xfs_item_ops;
 | |
| struct xfs_trans;
 | |
| struct xfs_log_callback;
 | |
| 
 | |
| xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
 | |
| 		       struct xlog_ticket *ticket,
 | |
| 		       struct xlog_in_core **iclog,
 | |
| 		       uint		flags);
 | |
| int	  _xfs_log_force(struct xfs_mount *mp,
 | |
| 			 uint		flags,
 | |
| 			 int		*log_forced);
 | |
| void	  xfs_log_force(struct xfs_mount	*mp,
 | |
| 			uint			flags);
 | |
| int	  _xfs_log_force_lsn(struct xfs_mount *mp,
 | |
| 			     xfs_lsn_t		lsn,
 | |
| 			     uint		flags,
 | |
| 			     int		*log_forced);
 | |
| void	  xfs_log_force_lsn(struct xfs_mount	*mp,
 | |
| 			    xfs_lsn_t		lsn,
 | |
| 			    uint		flags);
 | |
| int	  xfs_log_mount(struct xfs_mount	*mp,
 | |
| 			struct xfs_buftarg	*log_target,
 | |
| 			xfs_daddr_t		start_block,
 | |
| 			int		 	num_bblocks);
 | |
| int	  xfs_log_mount_finish(struct xfs_mount *mp);
 | |
| xfs_lsn_t xlog_assign_tail_lsn(struct xfs_mount *mp);
 | |
| xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);
 | |
| void	  xfs_log_space_wake(struct xfs_mount *mp);
 | |
| int	  xfs_log_notify(struct xfs_mount	*mp,
 | |
| 			 struct xlog_in_core	*iclog,
 | |
| 			 struct xfs_log_callback *callback_entry);
 | |
| int	  xfs_log_release_iclog(struct xfs_mount *mp,
 | |
| 			 struct xlog_in_core	 *iclog);
 | |
| int	  xfs_log_reserve(struct xfs_mount *mp,
 | |
| 			  int		   length,
 | |
| 			  int		   count,
 | |
| 			  struct xlog_ticket **ticket,
 | |
| 			  __uint8_t	   clientid,
 | |
| 			  bool		   permanent,
 | |
| 			  uint		   t_type);
 | |
| int	  xfs_log_regrant(struct xfs_mount *mp, struct xlog_ticket *tic);
 | |
| int	  xfs_log_unmount_write(struct xfs_mount *mp);
 | |
| void      xfs_log_unmount(struct xfs_mount *mp);
 | |
| int	  xfs_log_force_umount(struct xfs_mount *mp, int logerror);
 | |
| int	  xfs_log_need_covered(struct xfs_mount *mp);
 | |
| 
 | |
| void	  xlog_iodone(struct xfs_buf *);
 | |
| 
 | |
| struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
 | |
| void	  xfs_log_ticket_put(struct xlog_ticket *ticket);
 | |
| 
 | |
| void	xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
 | |
| 				xfs_lsn_t *commit_lsn, int flags);
 | |
| bool	xfs_log_item_in_current_chkpt(struct xfs_log_item *lip);
 | |
| 
 | |
| void	xfs_log_work_queue(struct xfs_mount *mp);
 | |
| void	xfs_log_worker(struct work_struct *work);
 | |
| void	xfs_log_quiesce(struct xfs_mount *mp);
 | |
| 
 | |
| #endif	/* __XFS_LOG_H__ */
 |