mirror of
https://github.com/coolsnowwolf/lede.git
synced 2025-04-16 14:23:38 +00:00
228 lines
6.2 KiB
Diff
228 lines
6.2 KiB
Diff
From: Felix Fietkau <nbd@nbd.name>
|
|
Date: Thu, 16 Feb 2023 18:39:04 +0100
|
|
Subject: [PATCH] net/core: add optional threading for backlog processing
|
|
|
|
When dealing with few flows or an imbalance on CPU utilization, static RPS
|
|
CPU assignment can be too inflexible. Add support for enabling threaded NAPI
|
|
for backlog processing in order to allow the scheduler to better balance
|
|
processing. This helps better spread the load across idle CPUs.
|
|
|
|
Signed-off-by: Felix Fietkau <nbd@nbd.name>
|
|
---
|
|
|
|
--- a/include/linux/netdevice.h
|
|
+++ b/include/linux/netdevice.h
|
|
@@ -558,6 +558,7 @@ static inline bool napi_complete(struct
|
|
}
|
|
|
|
int dev_set_threaded(struct net_device *dev, bool threaded);
|
|
+int backlog_set_threaded(bool threaded);
|
|
|
|
/**
|
|
* napi_disable - prevent NAPI from scheduling
|
|
@@ -3238,6 +3239,7 @@ struct softnet_data {
|
|
/* stats */
|
|
unsigned int processed;
|
|
unsigned int time_squeeze;
|
|
+ unsigned int process_queue_empty;
|
|
#ifdef CONFIG_RPS
|
|
struct softnet_data *rps_ipi_list;
|
|
#endif
|
|
--- a/net/core/dev.c
|
|
+++ b/net/core/dev.c
|
|
@@ -4729,7 +4729,7 @@ static void napi_schedule_rps(struct sof
|
|
struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
|
|
|
|
#ifdef CONFIG_RPS
|
|
- if (sd != mysd) {
|
|
+ if (sd != mysd && !test_bit(NAPI_STATE_THREADED, &sd->backlog.state)) {
|
|
sd->rps_ipi_next = mysd->rps_ipi_list;
|
|
mysd->rps_ipi_list = sd;
|
|
|
|
@@ -5848,6 +5848,8 @@ static DEFINE_PER_CPU(struct work_struct
|
|
/* Network device is going away, flush any packets still pending */
|
|
static void flush_backlog(struct work_struct *work)
|
|
{
|
|
+ unsigned int process_queue_empty;
|
|
+ bool threaded, flush_processq;
|
|
struct sk_buff *skb, *tmp;
|
|
struct softnet_data *sd;
|
|
|
|
@@ -5862,8 +5864,17 @@ static void flush_backlog(struct work_st
|
|
input_queue_head_incr(sd);
|
|
}
|
|
}
|
|
+
|
|
+ threaded = test_bit(NAPI_STATE_THREADED, &sd->backlog.state);
|
|
+ flush_processq = threaded &&
|
|
+ !skb_queue_empty_lockless(&sd->process_queue);
|
|
+ if (flush_processq)
|
|
+ process_queue_empty = sd->process_queue_empty;
|
|
rps_unlock_irq_enable(sd);
|
|
|
|
+ if (threaded)
|
|
+ goto out;
|
|
+
|
|
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
|
|
if (skb->dev->reg_state == NETREG_UNREGISTERING) {
|
|
__skb_unlink(skb, &sd->process_queue);
|
|
@@ -5871,7 +5882,16 @@ static void flush_backlog(struct work_st
|
|
input_queue_head_incr(sd);
|
|
}
|
|
}
|
|
+
|
|
+out:
|
|
local_bh_enable();
|
|
+
|
|
+ while (flush_processq) {
|
|
+ msleep(1);
|
|
+ rps_lock_irq_disable(sd);
|
|
+ flush_processq = process_queue_empty == sd->process_queue_empty;
|
|
+ rps_unlock_irq_enable(sd);
|
|
+ }
|
|
}
|
|
|
|
static bool flush_required(int cpu)
|
|
@@ -6003,6 +6023,7 @@ static int process_backlog(struct napi_s
|
|
}
|
|
|
|
rps_lock_irq_disable(sd);
|
|
+ sd->process_queue_empty++;
|
|
if (skb_queue_empty(&sd->input_pkt_queue)) {
|
|
/*
|
|
* Inline a custom version of __napi_complete().
|
|
@@ -6012,7 +6033,8 @@ static int process_backlog(struct napi_s
|
|
* We can use a plain write instead of clear_bit(),
|
|
* and we dont need an smp_mb() memory barrier.
|
|
*/
|
|
- napi->state = 0;
|
|
+ napi->state &= ~(NAPIF_STATE_SCHED |
|
|
+ NAPIF_STATE_SCHED_THREADED);
|
|
again = false;
|
|
} else {
|
|
skb_queue_splice_tail_init(&sd->input_pkt_queue,
|
|
@@ -6426,6 +6448,55 @@ int dev_set_threaded(struct net_device *
|
|
}
|
|
EXPORT_SYMBOL(dev_set_threaded);
|
|
|
|
+int backlog_set_threaded(bool threaded)
|
|
+{
|
|
+ static bool backlog_threaded;
|
|
+ int err = 0;
|
|
+ int i;
|
|
+
|
|
+ if (backlog_threaded == threaded)
|
|
+ return 0;
|
|
+
|
|
+ for_each_possible_cpu(i) {
|
|
+ struct softnet_data *sd = &per_cpu(softnet_data, i);
|
|
+ struct napi_struct *n = &sd->backlog;
|
|
+
|
|
+ if (n->thread)
|
|
+ continue;
|
|
+ n->thread = kthread_run(napi_threaded_poll, n, "napi/backlog-%d", i);
|
|
+ if (IS_ERR(n->thread)) {
|
|
+ err = PTR_ERR(n->thread);
|
|
+ pr_err("kthread_run failed with err %d\n", err);
|
|
+ n->thread = NULL;
|
|
+ threaded = false;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ }
|
|
+
|
|
+ backlog_threaded = threaded;
|
|
+
|
|
+ /* Make sure kthread is created before THREADED bit
|
|
+ * is set.
|
|
+ */
|
|
+ smp_mb__before_atomic();
|
|
+
|
|
+ for_each_possible_cpu(i) {
|
|
+ struct softnet_data *sd = &per_cpu(softnet_data, i);
|
|
+ struct napi_struct *n = &sd->backlog;
|
|
+ unsigned long flags;
|
|
+
|
|
+ rps_lock_irqsave(sd, &flags);
|
|
+ if (threaded)
|
|
+ n->state |= NAPIF_STATE_THREADED;
|
|
+ else
|
|
+ n->state &= ~NAPIF_STATE_THREADED;
|
|
+ rps_unlock_irq_restore(sd, &flags);
|
|
+ }
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
|
|
int (*poll)(struct napi_struct *, int), int weight)
|
|
{
|
|
@@ -11351,6 +11422,9 @@ static int dev_cpu_dead(unsigned int old
|
|
raise_softirq_irqoff(NET_TX_SOFTIRQ);
|
|
local_irq_enable();
|
|
|
|
+ if (test_bit(NAPI_STATE_THREADED, &oldsd->backlog.state))
|
|
+ return 0;
|
|
+
|
|
#ifdef CONFIG_RPS
|
|
remsd = oldsd->rps_ipi_list;
|
|
oldsd->rps_ipi_list = NULL;
|
|
@@ -11666,6 +11740,7 @@ static int __init net_dev_init(void)
|
|
INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
|
|
spin_lock_init(&sd->defer_lock);
|
|
|
|
+ INIT_LIST_HEAD(&sd->backlog.poll_list);
|
|
init_gro_hash(&sd->backlog);
|
|
sd->backlog.poll = process_backlog;
|
|
sd->backlog.weight = weight_p;
|
|
--- a/net/core/sysctl_net_core.c
|
|
+++ b/net/core/sysctl_net_core.c
|
|
@@ -31,6 +31,7 @@ static int min_sndbuf = SOCK_MIN_SNDBUF;
|
|
static int min_rcvbuf = SOCK_MIN_RCVBUF;
|
|
static int max_skb_frags = MAX_SKB_FRAGS;
|
|
static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE;
|
|
+static int backlog_threaded;
|
|
|
|
static int net_msg_warn; /* Unused, but still a sysctl */
|
|
|
|
@@ -189,6 +190,23 @@ static int rps_sock_flow_sysctl(struct c
|
|
}
|
|
#endif /* CONFIG_RPS */
|
|
|
|
+static int backlog_threaded_sysctl(struct ctl_table *table, int write,
|
|
+ void *buffer, size_t *lenp, loff_t *ppos)
|
|
+{
|
|
+ static DEFINE_MUTEX(backlog_threaded_mutex);
|
|
+ int ret;
|
|
+
|
|
+ mutex_lock(&backlog_threaded_mutex);
|
|
+
|
|
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
|
|
+ if (write && !ret)
|
|
+ ret = backlog_set_threaded(backlog_threaded);
|
|
+
|
|
+ mutex_unlock(&backlog_threaded_mutex);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
#ifdef CONFIG_NET_FLOW_LIMIT
|
|
static DEFINE_MUTEX(flow_limit_update_mutex);
|
|
|
|
@@ -541,6 +559,15 @@ static struct ctl_table net_core_table[]
|
|
.proc_handler = rps_sock_flow_sysctl
|
|
},
|
|
#endif
|
|
+ {
|
|
+ .procname = "backlog_threaded",
|
|
+ .data = &backlog_threaded,
|
|
+ .maxlen = sizeof(unsigned int),
|
|
+ .mode = 0644,
|
|
+ .proc_handler = backlog_threaded_sysctl,
|
|
+ .extra1 = SYSCTL_ZERO,
|
|
+ .extra2 = SYSCTL_ONE
|
|
+ },
|
|
#ifdef CONFIG_NET_FLOW_LIMIT
|
|
{
|
|
.procname = "flow_limit_cpu_bitmap",
|