inet: frags: save a pair of atomic operations in reassembly

As mentioned in commit 648700f76b ("inet: frags:
use rhashtables for reassembly units"):

  A followup patch will even remove the refcount hold/release
  left from prior implementation and save a couple of atomic
  operations.

This patch implements this idea, seven years later.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://patch.msgid.link/20250312082250.1803501-5-edumazet@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
This commit is contained in:
Eric Dumazet 2025-03-12 08:22:50 +00:00 committed by Paolo Abeni
parent eb0dfc0ef1
commit ca0359df45
5 changed files with 24 additions and 18 deletions

View file

@ -304,17 +304,20 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
goto err; goto err;
} }
rcu_read_lock();
fq = fq_find(net, cb, &hdr.source, &hdr.dest); fq = fq_find(net, cb, &hdr.source, &hdr.dest);
if (fq != NULL) { if (fq != NULL) {
int ret, refs = 1; int ret, refs = 0;
spin_lock(&fq->q.lock); spin_lock(&fq->q.lock);
ret = lowpan_frag_queue(fq, skb, frag_type, &refs); ret = lowpan_frag_queue(fq, skb, frag_type, &refs);
spin_unlock(&fq->q.lock); spin_unlock(&fq->q.lock);
rcu_read_unlock();
inet_frag_putn(&fq->q, refs); inet_frag_putn(&fq->q, refs);
return ret; return ret;
} }
rcu_read_unlock();
err: err:
kfree_skb(skb); kfree_skb(skb);

View file

@ -327,7 +327,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
timer_setup(&q->timer, f->frag_expire, 0); timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock); spin_lock_init(&q->lock);
refcount_set(&q->refcnt, 3); /* One reference for the timer, one for the hash table. */
refcount_set(&q->refcnt, 2);
return q; return q;
} }
@ -349,7 +350,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
*prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
&q->node, f->rhash_params); &q->node, f->rhash_params);
if (*prev) { if (*prev) {
int refs = 2; /* We could not insert in the hash table,
* we need to cancel what inet_frag_alloc()
* anticipated.
*/
int refs = 1;
q->flags |= INET_FRAG_COMPLETE; q->flags |= INET_FRAG_COMPLETE;
inet_frag_kill(q, &refs); inet_frag_kill(q, &refs);
@ -359,7 +364,6 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
return q; return q;
} }
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
{ {
/* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
@ -369,17 +373,11 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
return NULL; return NULL;
rcu_read_lock();
prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
if (!prev) if (!prev)
fq = inet_frag_create(fqdir, key, &prev); fq = inet_frag_create(fqdir, key, &prev);
if (!IS_ERR_OR_NULL(prev)) { if (!IS_ERR_OR_NULL(prev))
fq = prev; fq = prev;
if (!refcount_inc_not_zero(&fq->refcnt))
fq = NULL;
}
rcu_read_unlock();
return fq; return fq;
} }
EXPORT_SYMBOL(inet_frag_find); EXPORT_SYMBOL(inet_frag_find);

View file

@ -483,18 +483,21 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
__IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS); __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
/* Lookup (or create) queue header */ /* Lookup (or create) queue header */
rcu_read_lock();
qp = ip_find(net, ip_hdr(skb), user, vif); qp = ip_find(net, ip_hdr(skb), user, vif);
if (qp) { if (qp) {
int ret, refs = 1; int ret, refs = 0;
spin_lock(&qp->q.lock); spin_lock(&qp->q.lock);
ret = ip_frag_queue(qp, skb, &refs); ret = ip_frag_queue(qp, skb, &refs);
spin_unlock(&qp->q.lock); spin_unlock(&qp->q.lock);
rcu_read_unlock();
inet_frag_putn(&qp->q, refs); inet_frag_putn(&qp->q, refs);
return ret; return ret;
} }
rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
kfree_skb(skb); kfree_skb(skb);

View file

@ -450,7 +450,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
struct frag_hdr *fhdr; struct frag_hdr *fhdr;
struct frag_queue *fq; struct frag_queue *fq;
struct ipv6hdr *hdr; struct ipv6hdr *hdr;
int refs = 1; int refs = 0;
u8 prevhdr; u8 prevhdr;
/* Jumbo payload inhibits frag. header */ /* Jumbo payload inhibits frag. header */
@ -477,9 +477,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb); hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb);
rcu_read_lock();
fq = fq_find(net, fhdr->identification, user, hdr, fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0); skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) { if (fq == NULL) {
rcu_read_unlock();
pr_debug("Can't find and can't create new queue\n"); pr_debug("Can't find and can't create new queue\n");
return -ENOMEM; return -ENOMEM;
} }
@ -493,6 +495,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
} }
spin_unlock_bh(&fq->q.lock); spin_unlock_bh(&fq->q.lock);
rcu_read_unlock();
inet_frag_putn(&fq->q, refs); inet_frag_putn(&fq->q, refs);
return ret; return ret;
} }

View file

@ -305,9 +305,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
skb_postpush_rcsum(skb, skb_network_header(skb), skb_postpush_rcsum(skb, skb_network_header(skb),
skb_network_header_len(skb)); skb_network_header_len(skb));
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS); __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
fq->q.rb_fragments = RB_ROOT; fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL; fq->q.fragments_tail = NULL;
fq->q.last_run_head = NULL; fq->q.last_run_head = NULL;
@ -319,9 +317,7 @@ out_oversize:
out_oom: out_oom:
net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
out_fail: out_fail:
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS); __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
rcu_read_unlock();
inet_frag_kill(&fq->q, refs); inet_frag_kill(&fq->q, refs);
return -1; return -1;
} }
@ -379,10 +375,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
} }
iif = skb->dev ? skb->dev->ifindex : 0; iif = skb->dev ? skb->dev->ifindex : 0;
rcu_read_lock();
fq = fq_find(net, fhdr->identification, hdr, iif); fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) { if (fq) {
u32 prob_offset = 0; u32 prob_offset = 0;
int ret, refs = 1; int ret, refs = 0;
spin_lock(&fq->q.lock); spin_lock(&fq->q.lock);
@ -391,6 +388,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
&prob_offset, &refs); &prob_offset, &refs);
spin_unlock(&fq->q.lock); spin_unlock(&fq->q.lock);
rcu_read_unlock();
inet_frag_putn(&fq->q, refs); inet_frag_putn(&fq->q, refs);
if (prob_offset) { if (prob_offset) {
__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev), __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
@ -400,6 +398,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
} }
return ret; return ret;
} }
rcu_read_unlock();
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
kfree_skb(skb); kfree_skb(skb);