mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 16:54:21 +00:00 
			
		
		
		
	A fix for the recently discovered misdirected requests bug present in
jewel and later on the server side and all stable kernels, a fixup for -rc1 CRUSH changes and two usability enhancements: osd_request_timeout option and supported_features bus attribute. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQEcBAABCAAGBQJYwsEIAAoJEEp/3jgCEfOL34sH+wbYyT6uXQ3hlIoRt2FQNh5b F6qmvH4jYRI+YyjJHgE7lLEv7cq/PESPej2hrw9U7GAso0KEsazOv+qpj4AcW+u1 arXYTIQQa2w9sCuj7/BrbEzDtnNOVnGyD3Ng0wAfvbxg/37xzqumkbccuWJm6GdH Vjk31G4ZmaOOr38jeo0AkYWgs7kgfthLMFo73TgHTBBO9fkQQQL1xZH5D/Irzf8P 1ytfVyGeTl8D3szdkkOnc4eUFMwJ35wqesL+gAsQntx1/wDnGqa2IabXRs4oqr8F oT88LXSP8w2PaFKI1FrwOuMov6ngg38tir2SMxGDIQ6TdxtK8lW37Cx3eHavqtE= =f4Bs -----END PGP SIGNATURE----- Merge tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client Pull ceph fixes from Ilya Dryomov: - a fix for the recently discovered misdirected requests bug present in jewel and later on the server side and all stable kernels - a fixup for -rc1 CRUSH changes - two usability enhancements: osd_request_timeout option and supported_features bus attribute. * tag 'ceph-for-4.11-rc2' of git://github.com/ceph/ceph-client: libceph: osd_request_timeout option rbd: supported_features bus attribute libceph: don't set weight to IN when OSD is destroyed libceph: fix crush_decode() for older maps
This commit is contained in:
		
						commit
						24c534bb16
					
				
					 6 changed files with 66 additions and 8 deletions
				
			
		|  | @ -120,10 +120,11 @@ static int atomic_dec_return_safe(atomic_t *v) | |||
| 
 | ||||
| /* Feature bits */ | ||||
| 
 | ||||
| #define RBD_FEATURE_LAYERING	(1<<0) | ||||
| #define RBD_FEATURE_STRIPINGV2	(1<<1) | ||||
| #define RBD_FEATURE_EXCLUSIVE_LOCK (1<<2) | ||||
| #define RBD_FEATURE_DATA_POOL (1<<7) | ||||
| #define RBD_FEATURE_LAYERING		(1ULL<<0) | ||||
| #define RBD_FEATURE_STRIPINGV2		(1ULL<<1) | ||||
| #define RBD_FEATURE_EXCLUSIVE_LOCK	(1ULL<<2) | ||||
| #define RBD_FEATURE_DATA_POOL		(1ULL<<7) | ||||
| 
 | ||||
| #define RBD_FEATURES_ALL	(RBD_FEATURE_LAYERING |		\ | ||||
| 				 RBD_FEATURE_STRIPINGV2 |	\ | ||||
| 				 RBD_FEATURE_EXCLUSIVE_LOCK |	\ | ||||
|  | @ -499,16 +500,23 @@ static bool rbd_is_lock_owner(struct rbd_device *rbd_dev) | |||
| 	return is_lock_owner; | ||||
| } | ||||
| 
 | ||||
| static ssize_t rbd_supported_features_show(struct bus_type *bus, char *buf) | ||||
| { | ||||
| 	return sprintf(buf, "0x%llx\n", RBD_FEATURES_SUPPORTED); | ||||
| } | ||||
| 
 | ||||
| static BUS_ATTR(add, S_IWUSR, NULL, rbd_add); | ||||
| static BUS_ATTR(remove, S_IWUSR, NULL, rbd_remove); | ||||
| static BUS_ATTR(add_single_major, S_IWUSR, NULL, rbd_add_single_major); | ||||
| static BUS_ATTR(remove_single_major, S_IWUSR, NULL, rbd_remove_single_major); | ||||
| static BUS_ATTR(supported_features, S_IRUGO, rbd_supported_features_show, NULL); | ||||
| 
 | ||||
| static struct attribute *rbd_bus_attrs[] = { | ||||
| 	&bus_attr_add.attr, | ||||
| 	&bus_attr_remove.attr, | ||||
| 	&bus_attr_add_single_major.attr, | ||||
| 	&bus_attr_remove_single_major.attr, | ||||
| 	&bus_attr_supported_features.attr, | ||||
| 	NULL, | ||||
| }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -48,6 +48,7 @@ struct ceph_options { | |||
| 	unsigned long mount_timeout;		/* jiffies */ | ||||
| 	unsigned long osd_idle_ttl;		/* jiffies */ | ||||
| 	unsigned long osd_keepalive_timeout;	/* jiffies */ | ||||
| 	unsigned long osd_request_timeout;	/* jiffies */ | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * any type that can't be simply compared or doesn't need need | ||||
|  | @ -68,6 +69,7 @@ struct ceph_options { | |||
| #define CEPH_MOUNT_TIMEOUT_DEFAULT	msecs_to_jiffies(60 * 1000) | ||||
| #define CEPH_OSD_KEEPALIVE_DEFAULT	msecs_to_jiffies(5 * 1000) | ||||
| #define CEPH_OSD_IDLE_TTL_DEFAULT	msecs_to_jiffies(60 * 1000) | ||||
| #define CEPH_OSD_REQUEST_TIMEOUT_DEFAULT 0  /* no timeout */ | ||||
| 
 | ||||
| #define CEPH_MONC_HUNT_INTERVAL		msecs_to_jiffies(3 * 1000) | ||||
| #define CEPH_MONC_PING_INTERVAL		msecs_to_jiffies(10 * 1000) | ||||
|  |  | |||
|  | @ -189,6 +189,7 @@ struct ceph_osd_request { | |||
| 
 | ||||
| 	/* internal */ | ||||
| 	unsigned long r_stamp;                /* jiffies, send or check time */ | ||||
| 	unsigned long r_start_stamp;          /* jiffies */ | ||||
| 	int r_attempts; | ||||
| 	struct ceph_eversion r_replay_version; /* aka reassert_version */ | ||||
| 	u32 r_last_force_resend; | ||||
|  |  | |||
|  | @ -230,6 +230,7 @@ enum { | |||
| 	Opt_osdkeepalivetimeout, | ||||
| 	Opt_mount_timeout, | ||||
| 	Opt_osd_idle_ttl, | ||||
| 	Opt_osd_request_timeout, | ||||
| 	Opt_last_int, | ||||
| 	/* int args above */ | ||||
| 	Opt_fsid, | ||||
|  | @ -256,6 +257,7 @@ static match_table_t opt_tokens = { | |||
| 	{Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||||
| 	{Opt_mount_timeout, "mount_timeout=%d"}, | ||||
| 	{Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||||
| 	{Opt_osd_request_timeout, "osd_request_timeout=%d"}, | ||||
| 	/* int args above */ | ||||
| 	{Opt_fsid, "fsid=%s"}, | ||||
| 	{Opt_name, "name=%s"}, | ||||
|  | @ -361,6 +363,7 @@ ceph_parse_options(char *options, const char *dev_name, | |||
| 	opt->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | ||||
| 	opt->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; | ||||
| 	opt->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; | ||||
| 	opt->osd_request_timeout = CEPH_OSD_REQUEST_TIMEOUT_DEFAULT; | ||||
| 
 | ||||
| 	/* get mon ip(s) */ | ||||
| 	/* ip1[:port1][,ip2[:port2]...] */ | ||||
|  | @ -473,6 +476,15 @@ ceph_parse_options(char *options, const char *dev_name, | |||
| 			} | ||||
| 			opt->mount_timeout = msecs_to_jiffies(intval * 1000); | ||||
| 			break; | ||||
| 		case Opt_osd_request_timeout: | ||||
| 			/* 0 is "wait forever" (i.e. infinite timeout) */ | ||||
| 			if (intval < 0 || intval > INT_MAX / 1000) { | ||||
| 				pr_err("osd_request_timeout out of range\n"); | ||||
| 				err = -EINVAL; | ||||
| 				goto out; | ||||
| 			} | ||||
| 			opt->osd_request_timeout = msecs_to_jiffies(intval * 1000); | ||||
| 			break; | ||||
| 
 | ||||
| 		case Opt_share: | ||||
| 			opt->flags &= ~CEPH_OPT_NOSHARE; | ||||
|  | @ -557,6 +569,9 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client) | |||
| 	if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||||
| 		seq_printf(m, "osdkeepalivetimeout=%d,", | ||||
| 		    jiffies_to_msecs(opt->osd_keepalive_timeout) / 1000); | ||||
| 	if (opt->osd_request_timeout != CEPH_OSD_REQUEST_TIMEOUT_DEFAULT) | ||||
| 		seq_printf(m, "osd_request_timeout=%d,", | ||||
| 			   jiffies_to_msecs(opt->osd_request_timeout) / 1000); | ||||
| 
 | ||||
| 	/* drop redundant comma */ | ||||
| 	if (m->count != pos) | ||||
|  |  | |||
|  | @ -1709,6 +1709,8 @@ static void account_request(struct ceph_osd_request *req) | |||
| 
 | ||||
| 	req->r_flags |= CEPH_OSD_FLAG_ONDISK; | ||||
| 	atomic_inc(&req->r_osdc->num_requests); | ||||
| 
 | ||||
| 	req->r_start_stamp = jiffies; | ||||
| } | ||||
| 
 | ||||
| static void submit_request(struct ceph_osd_request *req, bool wrlocked) | ||||
|  | @ -1789,6 +1791,14 @@ static void cancel_request(struct ceph_osd_request *req) | |||
| 	ceph_osdc_put_request(req); | ||||
| } | ||||
| 
 | ||||
| static void abort_request(struct ceph_osd_request *req, int err) | ||||
| { | ||||
| 	dout("%s req %p tid %llu err %d\n", __func__, req, req->r_tid, err); | ||||
| 
 | ||||
| 	cancel_map_check(req); | ||||
| 	complete_request(req, err); | ||||
| } | ||||
| 
 | ||||
| static void check_pool_dne(struct ceph_osd_request *req) | ||||
| { | ||||
| 	struct ceph_osd_client *osdc = req->r_osdc; | ||||
|  | @ -2487,6 +2497,7 @@ static void handle_timeout(struct work_struct *work) | |||
| 		container_of(work, struct ceph_osd_client, timeout_work.work); | ||||
| 	struct ceph_options *opts = osdc->client->options; | ||||
| 	unsigned long cutoff = jiffies - opts->osd_keepalive_timeout; | ||||
| 	unsigned long expiry_cutoff = jiffies - opts->osd_request_timeout; | ||||
| 	LIST_HEAD(slow_osds); | ||||
| 	struct rb_node *n, *p; | ||||
| 
 | ||||
|  | @ -2502,15 +2513,23 @@ static void handle_timeout(struct work_struct *work) | |||
| 		struct ceph_osd *osd = rb_entry(n, struct ceph_osd, o_node); | ||||
| 		bool found = false; | ||||
| 
 | ||||
| 		for (p = rb_first(&osd->o_requests); p; p = rb_next(p)) { | ||||
| 		for (p = rb_first(&osd->o_requests); p; ) { | ||||
| 			struct ceph_osd_request *req = | ||||
| 			    rb_entry(p, struct ceph_osd_request, r_node); | ||||
| 
 | ||||
| 			p = rb_next(p); /* abort_request() */ | ||||
| 
 | ||||
| 			if (time_before(req->r_stamp, cutoff)) { | ||||
| 				dout(" req %p tid %llu on osd%d is laggy\n", | ||||
| 				     req, req->r_tid, osd->o_osd); | ||||
| 				found = true; | ||||
| 			} | ||||
| 			if (opts->osd_request_timeout && | ||||
| 			    time_before(req->r_start_stamp, expiry_cutoff)) { | ||||
| 				pr_err_ratelimited("tid %llu on osd%d timeout\n", | ||||
| 				       req->r_tid, osd->o_osd); | ||||
| 				abort_request(req, -ETIMEDOUT); | ||||
| 			} | ||||
| 		} | ||||
| 		for (p = rb_first(&osd->o_linger_requests); p; p = rb_next(p)) { | ||||
| 			struct ceph_osd_linger_request *lreq = | ||||
|  | @ -2530,6 +2549,21 @@ static void handle_timeout(struct work_struct *work) | |||
| 			list_move_tail(&osd->o_keepalive_item, &slow_osds); | ||||
| 	} | ||||
| 
 | ||||
| 	if (opts->osd_request_timeout) { | ||||
| 		for (p = rb_first(&osdc->homeless_osd.o_requests); p; ) { | ||||
| 			struct ceph_osd_request *req = | ||||
| 			    rb_entry(p, struct ceph_osd_request, r_node); | ||||
| 
 | ||||
| 			p = rb_next(p); /* abort_request() */ | ||||
| 
 | ||||
| 			if (time_before(req->r_start_stamp, expiry_cutoff)) { | ||||
| 				pr_err_ratelimited("tid %llu on osd%d timeout\n", | ||||
| 				       req->r_tid, osdc->homeless_osd.o_osd); | ||||
| 				abort_request(req, -ETIMEDOUT); | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (atomic_read(&osdc->num_homeless) || !list_empty(&slow_osds)) | ||||
| 		maybe_request_map(osdc); | ||||
| 
 | ||||
|  |  | |||
|  | @ -390,9 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end) | |||
| 	dout("crush decode tunable chooseleaf_stable = %d\n", | ||||
| 	     c->chooseleaf_stable); | ||||
| 
 | ||||
| 	crush_finalize(c); | ||||
| 
 | ||||
| done: | ||||
| 	crush_finalize(c); | ||||
| 	dout("crush_decode success\n"); | ||||
| 	return c; | ||||
| 
 | ||||
|  | @ -1380,7 +1379,6 @@ static int decode_new_up_state_weight(void **p, void *end, | |||
| 		if ((map->osd_state[osd] & CEPH_OSD_EXISTS) && | ||||
| 		    (xorstate & CEPH_OSD_EXISTS)) { | ||||
| 			pr_info("osd%d does not exist\n", osd); | ||||
| 			map->osd_weight[osd] = CEPH_OSD_IN; | ||||
| 			ret = set_primary_affinity(map, osd, | ||||
| 						   CEPH_OSD_DEFAULT_PRIMARY_AFFINITY); | ||||
| 			if (ret) | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Linus Torvalds
						Linus Torvalds