mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-10-31 16:54:21 +00:00 
			
		
		
		
	netfilter: xt_hashlimit: RCU conversion
xt_hashlimit uses a central lock per hash table and suffers from contention on some workloads. (Multiqueue NIC or if RPS is enabled) After RCU conversion, central lock is only used when a writer wants to add or delete an entry. For 'readers', updating an existing entry, they use an individual lock per entry. Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com> Signed-off-by: Patrick McHardy <kaber@trash.net>
This commit is contained in:
		
							parent
							
								
									902a3dd5e6
								
							
						
					
					
						commit
						02e4eb7591
					
				
					 1 changed files with 47 additions and 23 deletions
				
			
		|  | @ -81,12 +81,14 @@ struct dsthash_ent { | |||
| 	struct dsthash_dst dst; | ||||
| 
 | ||||
| 	/* modified structure members in the end */ | ||||
| 	spinlock_t lock; | ||||
| 	unsigned long expires;		/* precalculated expiry time */ | ||||
| 	struct { | ||||
| 		unsigned long prev;	/* last modification */ | ||||
| 		u_int32_t credit; | ||||
| 		u_int32_t credit_cap, cost; | ||||
| 	} rateinfo; | ||||
| 	struct rcu_head rcu; | ||||
| }; | ||||
| 
 | ||||
| struct xt_hashlimit_htable { | ||||
|  | @ -143,9 +145,11 @@ dsthash_find(const struct xt_hashlimit_htable *ht, | |||
| 	u_int32_t hash = hash_dst(ht, dst); | ||||
| 
 | ||||
| 	if (!hlist_empty(&ht->hash[hash])) { | ||||
| 		hlist_for_each_entry(ent, pos, &ht->hash[hash], node) | ||||
| 			if (dst_cmp(ent, dst)) | ||||
| 		hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node) | ||||
| 			if (dst_cmp(ent, dst)) { | ||||
| 				spin_lock(&ent->lock); | ||||
| 				return ent; | ||||
| 			} | ||||
| 	} | ||||
| 	return NULL; | ||||
| } | ||||
|  | @ -157,9 +161,10 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, | |||
| { | ||||
| 	struct dsthash_ent *ent; | ||||
| 
 | ||||
| 	spin_lock(&ht->lock); | ||||
| 	/* initialize hash with random val at the time we allocate
 | ||||
| 	 * the first hashtable entry */ | ||||
| 	if (!ht->rnd_initialized) { | ||||
| 	if (unlikely(!ht->rnd_initialized)) { | ||||
| 		get_random_bytes(&ht->rnd, sizeof(ht->rnd)); | ||||
| 		ht->rnd_initialized = true; | ||||
| 	} | ||||
|  | @ -168,27 +173,36 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, | |||
| 		/* FIXME: do something. question is what.. */ | ||||
| 		if (net_ratelimit()) | ||||
| 			pr_err("max count of %u reached\n", ht->cfg.max); | ||||
| 		return NULL; | ||||
| 	} | ||||
| 
 | ||||
| 	ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC); | ||||
| 		ent = NULL; | ||||
| 	} else | ||||
| 		ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC); | ||||
| 	if (!ent) { | ||||
| 		if (net_ratelimit()) | ||||
| 			pr_err("cannot allocate dsthash_ent\n"); | ||||
| 		return NULL; | ||||
| 	} | ||||
| 	memcpy(&ent->dst, dst, sizeof(ent->dst)); | ||||
| 	} else { | ||||
| 		memcpy(&ent->dst, dst, sizeof(ent->dst)); | ||||
| 		spin_lock_init(&ent->lock); | ||||
| 
 | ||||
| 	hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]); | ||||
| 	ht->count++; | ||||
| 		spin_lock(&ent->lock); | ||||
| 		hlist_add_head_rcu(&ent->node, &ht->hash[hash_dst(ht, dst)]); | ||||
| 		ht->count++; | ||||
| 	} | ||||
| 	spin_unlock(&ht->lock); | ||||
| 	return ent; | ||||
| } | ||||
| 
 | ||||
| static void dsthash_free_rcu(struct rcu_head *head) | ||||
| { | ||||
| 	struct dsthash_ent *ent = container_of(head, struct dsthash_ent, rcu); | ||||
| 
 | ||||
| 	kmem_cache_free(hashlimit_cachep, ent); | ||||
| } | ||||
| 
 | ||||
| static inline void | ||||
| dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) | ||||
| { | ||||
| 	hlist_del(&ent->node); | ||||
| 	kmem_cache_free(hashlimit_cachep, ent); | ||||
| 	hlist_del_rcu(&ent->node); | ||||
| 	call_rcu_bh(&ent->rcu, dsthash_free_rcu); | ||||
| 	ht->count--; | ||||
| } | ||||
| static void htable_gc(unsigned long htlong); | ||||
|  | @ -512,15 +526,14 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) | |||
| 	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) | ||||
| 		goto hotdrop; | ||||
| 
 | ||||
| 	spin_lock_bh(&hinfo->lock); | ||||
| 	rcu_read_lock_bh(); | ||||
| 	dh = dsthash_find(hinfo, &dst); | ||||
| 	if (dh == NULL) { | ||||
| 		dh = dsthash_alloc_init(hinfo, &dst); | ||||
| 		if (dh == NULL) { | ||||
| 			spin_unlock_bh(&hinfo->lock); | ||||
| 			rcu_read_unlock_bh(); | ||||
| 			goto hotdrop; | ||||
| 		} | ||||
| 
 | ||||
| 		dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); | ||||
| 		dh->rateinfo.prev = jiffies; | ||||
| 		dh->rateinfo.credit = user2credits(hinfo->cfg.avg * | ||||
|  | @ -537,11 +550,13 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) | |||
| 	if (dh->rateinfo.credit >= dh->rateinfo.cost) { | ||||
| 		/* below the limit */ | ||||
| 		dh->rateinfo.credit -= dh->rateinfo.cost; | ||||
| 		spin_unlock_bh(&hinfo->lock); | ||||
| 		spin_unlock(&dh->lock); | ||||
| 		rcu_read_unlock_bh(); | ||||
| 		return !(info->cfg.mode & XT_HASHLIMIT_INVERT); | ||||
| 	} | ||||
| 
 | ||||
| 	spin_unlock_bh(&hinfo->lock); | ||||
| 	spin_unlock(&dh->lock); | ||||
| 	rcu_read_unlock_bh(); | ||||
| 	/* default match is underlimit - so over the limit, we need to invert */ | ||||
| 	return info->cfg.mode & XT_HASHLIMIT_INVERT; | ||||
| 
 | ||||
|  | @ -666,12 +681,15 @@ static void dl_seq_stop(struct seq_file *s, void *v) | |||
| static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, | ||||
| 				   struct seq_file *s) | ||||
| { | ||||
| 	int res; | ||||
| 
 | ||||
| 	spin_lock(&ent->lock); | ||||
| 	/* recalculate to show accurate numbers */ | ||||
| 	rateinfo_recalc(ent, jiffies); | ||||
| 
 | ||||
| 	switch (family) { | ||||
| 	case NFPROTO_IPV4: | ||||
| 		return seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", | ||||
| 		res = seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", | ||||
| 				 (long)(ent->expires - jiffies)/HZ, | ||||
| 				 &ent->dst.ip.src, | ||||
| 				 ntohs(ent->dst.src_port), | ||||
|  | @ -679,9 +697,10 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, | |||
| 				 ntohs(ent->dst.dst_port), | ||||
| 				 ent->rateinfo.credit, ent->rateinfo.credit_cap, | ||||
| 				 ent->rateinfo.cost); | ||||
| 		break; | ||||
| #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) | ||||
| 	case NFPROTO_IPV6: | ||||
| 		return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", | ||||
| 		res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", | ||||
| 				 (long)(ent->expires - jiffies)/HZ, | ||||
| 				 &ent->dst.ip6.src, | ||||
| 				 ntohs(ent->dst.src_port), | ||||
|  | @ -689,11 +708,14 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, | |||
| 				 ntohs(ent->dst.dst_port), | ||||
| 				 ent->rateinfo.credit, ent->rateinfo.credit_cap, | ||||
| 				 ent->rateinfo.cost); | ||||
| 		break; | ||||
| #endif | ||||
| 	default: | ||||
| 		BUG(); | ||||
| 		return 0; | ||||
| 		res = 0; | ||||
| 	} | ||||
| 	spin_unlock(&ent->lock); | ||||
| 	return res; | ||||
| } | ||||
| 
 | ||||
| static int dl_seq_show(struct seq_file *s, void *v) | ||||
|  | @ -817,9 +839,11 @@ err1: | |||
| 
 | ||||
| static void __exit hashlimit_mt_exit(void) | ||||
| { | ||||
| 	kmem_cache_destroy(hashlimit_cachep); | ||||
| 	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg)); | ||||
| 	unregister_pernet_subsys(&hashlimit_net_ops); | ||||
| 
 | ||||
| 	rcu_barrier_bh(); | ||||
| 	kmem_cache_destroy(hashlimit_cachep); | ||||
| } | ||||
| 
 | ||||
| module_init(hashlimit_mt_init); | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Eric Dumazet
						Eric Dumazet