diff --git a/target/linux/ipq40xx/Makefile b/target/linux/ipq40xx/Makefile index 6cf99f572..552fa4b4f 100644 --- a/target/linux/ipq40xx/Makefile +++ b/target/linux/ipq40xx/Makefile @@ -18,6 +18,8 @@ DEFAULT_PACKAGES += \ kmod-usb-dwc3-qcom \ kmod-leds-gpio kmod-gpio-button-hotplug swconfig \ kmod-ath10k-ct wpad-openssl \ - kmod-usb3 kmod-usb-dwc3 ath10k-firmware-qca4019-ct ethtool + kmod-usb3 kmod-usb-dwc3 ath10k-firmware-qca4019-ct \ + automount autosamba luci-app-ipsec-vpnd luci-app-unblockmusic luci-app-cpufreq luci-app-zerotier luci-app-xlnetacc \ + htop fdisk e2fsprogs ethtool $(eval $(call BuildTarget)) diff --git a/target/linux/ipq40xx/base-files/etc/hotplug.d/net/21_adjust_network b/target/linux/ipq40xx/base-files/etc/hotplug.d/net/21_adjust_network new file mode 100644 index 000000000..7aa4f6f72 --- /dev/null +++ b/target/linux/ipq40xx/base-files/etc/hotplug.d/net/21_adjust_network @@ -0,0 +1,7 @@ +#!/bin/sh + +[ -f /lib/adjust_network.sh ] && { + . /lib/adjust_network.sh + + adjust_eth_queue +} diff --git a/target/linux/ipq40xx/base-files/etc/init.d/adjust_network b/target/linux/ipq40xx/base-files/etc/init.d/adjust_network new file mode 100755 index 000000000..02af81983 --- /dev/null +++ b/target/linux/ipq40xx/base-files/etc/init.d/adjust_network @@ -0,0 +1,19 @@ +#!/bin/sh /etc/rc.common +# Copyright (C) 2006-2011 OpenWrt.org + +START=11 +STOP=98 + +adjust_smp_affinity() { + test -f /lib/adjust_network.sh && { + . /lib/adjust_network.sh + + adjust_eth_queue + adjust_edma_smp_affinity + adjust_radio_smp_affinity + } +} + +boot() { + adjust_smp_affinity +} diff --git a/target/linux/ipq40xx/base-files/lib/adjust_network.sh b/target/linux/ipq40xx/base-files/lib/adjust_network.sh new file mode 100644 index 000000000..99423022c --- /dev/null +++ b/target/linux/ipq40xx/base-files/lib/adjust_network.sh @@ -0,0 +1,89 @@ +#!/bin/sh +# this scripts is used for adjust cpu's choice of interrupts. +# + +################################################ +# Adjust smp_affinity of edma +# Globals: +# None +# Arguments: +# None +# Returns: +# None +# Remark: +# execute only once on start-up. +################################################ +adjust_edma_smp_affinity() { + grep -q edma_eth_ /proc/interrupts || return 0 + local nr=`cat /proc/cpuinfo | grep processor | wc -l` + local cpu=0 + local tx_irq_num + + for tx_num in `seq 0 1 15` ; do + cpu=`printf "%x" $((1<<((tx_num/4+0)%nr)))` + tx_irq_num=`grep -m1 edma_eth_tx$tx_num /proc/interrupts | cut -d ':' -f 1 | tail -n1 | tr -d ' '` + [ -n "$tx_irq_num" ] && echo $cpu > /proc/irq/$tx_irq_num/smp_affinity + done + + for rx_num in `seq 0 1 7` ; do + cpu=`printf "%x" $((1<<((rx_num/2)%nr)))` + rx_irq_num=`grep -m1 edma_eth_rx$rx_num /proc/interrupts | cut -d ':' -f 1 | tail -n1 | tr -d ' '` + [ -n "$rx_irq_num" ] && echo $cpu > /proc/irq/$rx_irq_num/smp_affinity + done +} + +################################################ +# Adjust smp_affinity of ath10k for 2G and 5G +# Globals: +# None +# Arguments: +# None +# Returns: +# None +# Remark: +# execute only once on start-up. +################################################ +adjust_radio_smp_affinity() { + local irqs="`grep -E 'ath10k' /proc/interrupts | cut -d ':' -f 1 | tr -d ' '`" + local nr=`cat /proc/cpuinfo | grep processor | wc -l` + local idx=2 + + for irq in $irqs; do + cpu=`printf "%x" $((1<<((idx)%nr)))` + echo $cpu > /proc/irq/$irq/smp_affinity + idx=$((idx+1)) + done +} + +################################################ +# Adjust queue of eth +# Globals: +# None +# Arguments: +# None +# Returns: +# None +# Remark: +# Each network reboot needs to be executed. +################################################ +adjust_eth_queue() { + local nr=`cat /proc/cpuinfo | grep processor | wc -l` + local idx=0 + + for epath in /sys/class/net/eth[0-9]*; do + test -e $epath || break + echo $epath | grep -q "\." && continue + eth=`basename $epath` + idx=0 + for exps in /sys/class/net/$eth/queues/rx-[0-9]*/rps_cpus; do + test -e $exps || break + cpu=`printf "%x" $((1<<((idx+1)%nr)))` + idx=$((idx+1)) + echo $cpu > $exps + echo 256 > `dirname $exps`/rps_flow_cnt + done + which ethtool >/dev/null 2>&1 && ethtool -K $eth gro off + done + + echo 1024 > /proc/sys/net/core/rps_sock_flow_entries +} diff --git a/target/linux/ipq40xx/patches-4.19/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch b/target/linux/ipq40xx/patches-4.19/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch new file mode 100644 index 000000000..d8d09c142 --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch @@ -0,0 +1,205 @@ +diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c +index 724f355..7a16236 100644 +--- a/drivers/net/ethernet/qualcomm/essedma/edma.c ++++ b/drivers/net/ethernet/qualcomm/essedma/edma.c +@@ -22,14 +22,6 @@ extern struct net_device *edma_netdev[EDMA_MAX_PORTID_SUPPORTED]; + bool edma_stp_rstp; + u16 edma_ath_eth_type; + +-/* edma_skb_priority_offset() +- * get edma skb priority +- */ +-static unsigned int edma_skb_priority_offset(struct sk_buff *skb) +-{ +- return (skb->priority >> 2) & 1; +-} +- + /* edma_alloc_tx_ring() + * Allocate Tx descriptors ring + */ +@@ -1014,13 +1006,14 @@ static inline u16 edma_tpd_available(struct edma_common_info *edma_cinfo, + /* edma_tx_queue_get() + * Get the starting number of the queue + */ +-static inline int edma_tx_queue_get(struct edma_adapter *adapter, ++static inline int edma_tx_queue_get(struct edma_common_info *edma_cinfo, struct edma_adapter *adapter, + struct sk_buff *skb, int txq_id) + { + /* skb->priority is used as an index to skb priority table + * and based on packet priority, correspong queue is assigned. ++ * FIXME we just simple use jiffies for time base balance + */ +- return adapter->tx_start_offset[txq_id] + edma_skb_priority_offset(skb); ++ return adapter->tx_start_offset[txq_id] + (smp_processor_id() % edma_cinfo->num_txq_per_core_netdev); + } + + /* edma_tx_update_hw_idx() +@@ -1389,8 +1382,9 @@ netdev_tx_t edma_xmit(struct sk_buff *skb, + } + + /* this will be one of the 4 TX queues exposed to linux kernel */ +- txq_id = skb_get_queue_mapping(skb); +- queue_id = edma_tx_queue_get(adapter, skb, txq_id); ++ /* XXX what if num_online_cpus() > EDMA_CPU_CORES_SUPPORTED */ ++ txq_id = ((jiffies >> 5) % (EDMA_CPU_CORES_SUPPORTED - 1) + smp_processor_id() + 1) % EDMA_CPU_CORES_SUPPORTED; ++ queue_id = edma_tx_queue_get(edma_cinfo, adapter, skb, txq_id); + etdr = edma_cinfo->tpd_ring[queue_id]; + nq = netdev_get_tx_queue(net_dev, txq_id); + +@@ -1871,8 +1865,8 @@ void edma_free_irqs(struct edma_adapter *adapter) + int i, j; + int k = ((edma_cinfo->num_rx_queues == 4) ? 1 : 2); + +- for (i = 0; i < CONFIG_NR_CPUS; i++) { +- for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + 4); j++) ++ for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) { ++ for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + edma_cinfo->num_txq_per_core); j++) + free_irq(edma_cinfo->tx_irq[j], &edma_cinfo->edma_percpu_info[i]); + + for (j = edma_cinfo->edma_percpu_info[i].rx_start; j < (edma_cinfo->edma_percpu_info[i].rx_start + k); j++) +diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.h b/drivers/net/ethernet/qualcomm/essedma/edma.h +index 015e5f5..abb0bd5 100644 +--- a/drivers/net/ethernet/qualcomm/essedma/edma.h ++++ b/drivers/net/ethernet/qualcomm/essedma/edma.h +@@ -324,6 +324,7 @@ struct edma_common_info { + u32 from_cpu; /* from CPU TPD field */ + u32 num_rxq_per_core; /* Rx queues per core */ + u32 num_txq_per_core; /* Tx queues per core */ ++ u32 num_txq_per_core_netdev; /* Tx queues per core per netdev */ + u16 tx_ring_count; /* Tx ring count */ + u16 rx_ring_count; /* Rx ring*/ + u16 rx_head_buffer_len; /* rx buffer length */ +@@ -331,7 +332,7 @@ struct edma_common_info { + u32 page_mode; /* Jumbo frame supported flag */ + u32 fraglist_mode; /* fraglist supported flag */ + struct edma_hw hw; /* edma hw specific structure */ +- struct edma_per_cpu_queues_info edma_percpu_info[CONFIG_NR_CPUS]; /* per cpu information */ ++ struct edma_per_cpu_queues_info edma_percpu_info[EDMA_CPU_CORES_SUPPORTED]; /* per cpu information */ + spinlock_t stats_lock; /* protect edma stats area for updation */ + struct timer_list edma_stats_timer; + bool is_single_phy; +@@ -401,7 +402,7 @@ struct edma_adapter { + u32 link_state; /* phy link state */ + u32 phy_mdio_addr; /* PHY device address on MII interface */ + u32 poll_required; /* check if link polling is required */ +- u32 tx_start_offset[CONFIG_NR_CPUS]; /* tx queue start */ ++ u32 tx_start_offset[EDMA_CPU_CORES_SUPPORTED]; /* tx queue start */ + u32 default_vlan_tag; /* vlan tag */ + u32 dp_bitmap; + uint8_t phy_id[MII_BUS_ID_SIZE + 3]; +diff --git a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c +index d53c63b..2d4770c 100644 +--- a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c ++++ b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c +@@ -719,11 +719,7 @@ static int edma_axi_probe(struct platform_device *pdev) + int i, j, k, err = 0; + int portid_bmp; + int idx = 0, idx_mac = 0; +- +- if (CONFIG_NR_CPUS != EDMA_CPU_CORES_SUPPORTED) { +- dev_err(&pdev->dev, "Invalid CPU Cores\n"); +- return -EINVAL; +- } ++ int netdev_group = 2; + + if ((num_rxq != 4) && (num_rxq != 8)) { + dev_err(&pdev->dev, "Invalid RX queue, edma probe failed\n"); +@@ -747,7 +743,7 @@ static int edma_axi_probe(struct platform_device *pdev) + /* Initialize the netdev array before allocation + * to avoid double free + */ +- for (i = 0 ; i < edma_cinfo->num_gmac ; i++) ++ for (i = 0 ; i < EDMA_MAX_PORTID_SUPPORTED; i++) + edma_netdev[i] = NULL; + + for (i = 0 ; i < edma_cinfo->num_gmac ; i++) { +@@ -768,8 +764,11 @@ static int edma_axi_probe(struct platform_device *pdev) + + /* Fill ring details */ + edma_cinfo->num_tx_queues = EDMA_MAX_TRANSMIT_QUEUE; +- edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / 4); ++ edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / num_online_cpus()); ++ edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus()); + edma_cinfo->tx_ring_count = EDMA_TX_RING_SIZE; ++ if (edma_cinfo->num_txq_per_core == 0) ++ edma_cinfo->num_txq_per_core = 1; + + /* Update num rx queues based on module parameter */ + edma_cinfo->num_rx_queues = num_rxq; +@@ -939,6 +938,13 @@ static int edma_axi_probe(struct platform_device *pdev) + idx_mac++; + } + ++ if (edma_cinfo->num_gmac == 1) { ++ netdev_group = 1; ++ edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus()); ++ } ++ if (edma_cinfo->num_txq_per_core_netdev == 0) ++ edma_cinfo->num_txq_per_core_netdev = 1; ++ + /* Populate the adapter structure register the netdevice */ + for (i = 0; i < edma_cinfo->num_gmac; i++) { + int k, m; +@@ -946,17 +952,16 @@ static int edma_axi_probe(struct platform_device *pdev) + adapter[i] = netdev_priv(edma_netdev[i]); + adapter[i]->netdev = edma_netdev[i]; + adapter[i]->pdev = pdev; +- for (j = 0; j < CONFIG_NR_CPUS; j++) { +- m = i % 2; +- adapter[i]->tx_start_offset[j] = +- ((j << EDMA_TX_CPU_START_SHIFT) + (m << 1)); ++ for (j = 0; j < num_online_cpus() && j < EDMA_CPU_CORES_SUPPORTED; j++) { ++ m = i % netdev_group; ++ adapter[i]->tx_start_offset[j] = j * edma_cinfo->num_txq_per_core + m * edma_cinfo->num_txq_per_core_netdev; + /* Share the queues with available net-devices. + * For instance , with 5 net-devices + * eth0/eth2/eth4 will share q0,q1,q4,q5,q8,q9,q12,q13 + * and eth1/eth3 will get the remaining. + */ + for (k = adapter[i]->tx_start_offset[j]; k < +- (adapter[i]->tx_start_offset[j] + 2); k++) { ++ (adapter[i]->tx_start_offset[j] + edma_cinfo->num_txq_per_core_netdev); k++) { + if (edma_fill_netdev(edma_cinfo, k, i, j)) { + pr_err("Netdev overflow Error\n"); + goto err_register; +@@ -1109,9 +1114,12 @@ static int edma_axi_probe(struct platform_device *pdev) + /* populate per_core_info, do a napi_Add, request 16 TX irqs, + * 8 RX irqs, do a napi enable + */ +- for (i = 0; i < CONFIG_NR_CPUS; i++) { ++ for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) { + u8 rx_start; + ++ tx_mask[i] = (0xFFFF >> (16 - edma_cinfo->num_txq_per_core)) << (i * edma_cinfo->num_txq_per_core); ++ tx_start[i] = i * edma_cinfo->num_txq_per_core; ++ + edma_cinfo->edma_percpu_info[i].napi.state = 0; + + netif_napi_add(edma_netdev[0], +@@ -1131,7 +1139,7 @@ static int edma_axi_probe(struct platform_device *pdev) + + /* Request irq per core */ + for (j = edma_cinfo->edma_percpu_info[i].tx_start; +- j < tx_start[i] + 4; j++) { ++ j < tx_start[i] + edma_cinfo->num_txq_per_core; j++) { + sprintf(&edma_tx_irq[j][0], "edma_eth_tx%d", j); + err = request_irq(edma_cinfo->tx_irq[j], + edma_interrupt, +@@ -1253,7 +1261,7 @@ static int edma_axi_probe(struct platform_device *pdev) + #endif + err_rmap_add_fail: + edma_free_irqs(adapter[0]); +- for (i = 0; i < CONFIG_NR_CPUS; i++) ++ for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) + napi_disable(&edma_cinfo->edma_percpu_info[i].napi); + err_reset: + err_unregister_sysctl_tbl: +@@ -1301,7 +1309,7 @@ static int edma_axi_remove(struct platform_device *pdev) + unregister_netdev(edma_netdev[i]); + + edma_stop_rx_tx(hw); +- for (i = 0; i < CONFIG_NR_CPUS; i++) ++ for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) + napi_disable(&edma_cinfo->edma_percpu_info[i].napi); + + edma_irq_disable(edma_cinfo); diff --git a/target/linux/ipq40xx/patches-4.19/716-essedma-reduce-write-reg.patch b/target/linux/ipq40xx/patches-4.19/716-essedma-reduce-write-reg.patch new file mode 100644 index 000000000..58b87467d --- /dev/null +++ b/target/linux/ipq40xx/patches-4.19/716-essedma-reduce-write-reg.patch @@ -0,0 +1,54 @@ +diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c +index fc274c8..e9d12a4 100644 +--- a/drivers/net/ethernet/qualcomm/essedma/edma.c ++++ b/drivers/net/ethernet/qualcomm/essedma/edma.c +@@ -2075,15 +2075,13 @@ int edma_poll(struct napi_struct *napi, int budget) + int i, work_done = 0; + u16 rx_pending_fill; + +- /* Store the Rx/Tx status by ANDing it with +- * appropriate CPU RX?TX mask ++ /* Store the Tx status by ANDing it with ++ * appropriate CPU TX mask + */ +- edma_read_reg(EDMA_REG_RX_ISR, ®_data); +- edma_percpu_info->rx_status |= reg_data & edma_percpu_info->rx_mask; +- shadow_rx_status = edma_percpu_info->rx_status; + edma_read_reg(EDMA_REG_TX_ISR, ®_data); + edma_percpu_info->tx_status |= reg_data & edma_percpu_info->tx_mask; + shadow_tx_status = edma_percpu_info->tx_status; ++ edma_write_reg(EDMA_REG_TX_ISR, shadow_tx_status); + + /* Every core will have a start, which will be computed + * in probe and stored in edma_percpu_info->tx_start variable. +@@ -2098,6 +2096,14 @@ int edma_poll(struct napi_struct *napi, int budget) + edma_percpu_info->tx_status &= ~(1 << queue_id); + } + ++ /* Store the Rx status by ANDing it with ++ * appropriate CPU RX mask ++ */ ++ edma_read_reg(EDMA_REG_RX_ISR, ®_data); ++ edma_percpu_info->rx_status |= reg_data & edma_percpu_info->rx_mask; ++ shadow_rx_status = edma_percpu_info->rx_status; ++ edma_write_reg(EDMA_REG_RX_ISR, shadow_rx_status); ++ + /* Every core will have a start, which will be computed + * in probe and stored in edma_percpu_info->tx_start variable. + * We will shift the status bit by tx_start to obtain +@@ -2122,15 +2128,6 @@ int edma_poll(struct napi_struct *napi, int budget) + } + } + +- /* Clear the status register, to avoid the interrupts to +- * reoccur.This clearing of interrupt status register is +- * done here as writing to status register only takes place +- * once the producer/consumer index has been updated to +- * reflect that the packet transmission/reception went fine. +- */ +- edma_write_reg(EDMA_REG_RX_ISR, shadow_rx_status); +- edma_write_reg(EDMA_REG_TX_ISR, shadow_tx_status); +- + /* If budget not fully consumed, exit the polling mode */ + if (likely(work_done < budget)) { + napi_complete(napi); diff --git a/target/linux/ipq40xx/patches-5.4/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch b/target/linux/ipq40xx/patches-5.4/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch new file mode 100644 index 000000000..d8d09c142 --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/715-essedma-refine-txq-to-be-adaptive-of-cpus-and-netdev.patch @@ -0,0 +1,205 @@ +diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c +index 724f355..7a16236 100644 +--- a/drivers/net/ethernet/qualcomm/essedma/edma.c ++++ b/drivers/net/ethernet/qualcomm/essedma/edma.c +@@ -22,14 +22,6 @@ extern struct net_device *edma_netdev[EDMA_MAX_PORTID_SUPPORTED]; + bool edma_stp_rstp; + u16 edma_ath_eth_type; + +-/* edma_skb_priority_offset() +- * get edma skb priority +- */ +-static unsigned int edma_skb_priority_offset(struct sk_buff *skb) +-{ +- return (skb->priority >> 2) & 1; +-} +- + /* edma_alloc_tx_ring() + * Allocate Tx descriptors ring + */ +@@ -1014,13 +1006,14 @@ static inline u16 edma_tpd_available(struct edma_common_info *edma_cinfo, + /* edma_tx_queue_get() + * Get the starting number of the queue + */ +-static inline int edma_tx_queue_get(struct edma_adapter *adapter, ++static inline int edma_tx_queue_get(struct edma_common_info *edma_cinfo, struct edma_adapter *adapter, + struct sk_buff *skb, int txq_id) + { + /* skb->priority is used as an index to skb priority table + * and based on packet priority, correspong queue is assigned. ++ * FIXME we just simple use jiffies for time base balance + */ +- return adapter->tx_start_offset[txq_id] + edma_skb_priority_offset(skb); ++ return adapter->tx_start_offset[txq_id] + (smp_processor_id() % edma_cinfo->num_txq_per_core_netdev); + } + + /* edma_tx_update_hw_idx() +@@ -1389,8 +1382,9 @@ netdev_tx_t edma_xmit(struct sk_buff *skb, + } + + /* this will be one of the 4 TX queues exposed to linux kernel */ +- txq_id = skb_get_queue_mapping(skb); +- queue_id = edma_tx_queue_get(adapter, skb, txq_id); ++ /* XXX what if num_online_cpus() > EDMA_CPU_CORES_SUPPORTED */ ++ txq_id = ((jiffies >> 5) % (EDMA_CPU_CORES_SUPPORTED - 1) + smp_processor_id() + 1) % EDMA_CPU_CORES_SUPPORTED; ++ queue_id = edma_tx_queue_get(edma_cinfo, adapter, skb, txq_id); + etdr = edma_cinfo->tpd_ring[queue_id]; + nq = netdev_get_tx_queue(net_dev, txq_id); + +@@ -1871,8 +1865,8 @@ void edma_free_irqs(struct edma_adapter *adapter) + int i, j; + int k = ((edma_cinfo->num_rx_queues == 4) ? 1 : 2); + +- for (i = 0; i < CONFIG_NR_CPUS; i++) { +- for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + 4); j++) ++ for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) { ++ for (j = edma_cinfo->edma_percpu_info[i].tx_start; j < (edma_cinfo->edma_percpu_info[i].tx_start + edma_cinfo->num_txq_per_core); j++) + free_irq(edma_cinfo->tx_irq[j], &edma_cinfo->edma_percpu_info[i]); + + for (j = edma_cinfo->edma_percpu_info[i].rx_start; j < (edma_cinfo->edma_percpu_info[i].rx_start + k); j++) +diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.h b/drivers/net/ethernet/qualcomm/essedma/edma.h +index 015e5f5..abb0bd5 100644 +--- a/drivers/net/ethernet/qualcomm/essedma/edma.h ++++ b/drivers/net/ethernet/qualcomm/essedma/edma.h +@@ -324,6 +324,7 @@ struct edma_common_info { + u32 from_cpu; /* from CPU TPD field */ + u32 num_rxq_per_core; /* Rx queues per core */ + u32 num_txq_per_core; /* Tx queues per core */ ++ u32 num_txq_per_core_netdev; /* Tx queues per core per netdev */ + u16 tx_ring_count; /* Tx ring count */ + u16 rx_ring_count; /* Rx ring*/ + u16 rx_head_buffer_len; /* rx buffer length */ +@@ -331,7 +332,7 @@ struct edma_common_info { + u32 page_mode; /* Jumbo frame supported flag */ + u32 fraglist_mode; /* fraglist supported flag */ + struct edma_hw hw; /* edma hw specific structure */ +- struct edma_per_cpu_queues_info edma_percpu_info[CONFIG_NR_CPUS]; /* per cpu information */ ++ struct edma_per_cpu_queues_info edma_percpu_info[EDMA_CPU_CORES_SUPPORTED]; /* per cpu information */ + spinlock_t stats_lock; /* protect edma stats area for updation */ + struct timer_list edma_stats_timer; + bool is_single_phy; +@@ -401,7 +402,7 @@ struct edma_adapter { + u32 link_state; /* phy link state */ + u32 phy_mdio_addr; /* PHY device address on MII interface */ + u32 poll_required; /* check if link polling is required */ +- u32 tx_start_offset[CONFIG_NR_CPUS]; /* tx queue start */ ++ u32 tx_start_offset[EDMA_CPU_CORES_SUPPORTED]; /* tx queue start */ + u32 default_vlan_tag; /* vlan tag */ + u32 dp_bitmap; + uint8_t phy_id[MII_BUS_ID_SIZE + 3]; +diff --git a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c +index d53c63b..2d4770c 100644 +--- a/drivers/net/ethernet/qualcomm/essedma/edma_axi.c ++++ b/drivers/net/ethernet/qualcomm/essedma/edma_axi.c +@@ -719,11 +719,7 @@ static int edma_axi_probe(struct platform_device *pdev) + int i, j, k, err = 0; + int portid_bmp; + int idx = 0, idx_mac = 0; +- +- if (CONFIG_NR_CPUS != EDMA_CPU_CORES_SUPPORTED) { +- dev_err(&pdev->dev, "Invalid CPU Cores\n"); +- return -EINVAL; +- } ++ int netdev_group = 2; + + if ((num_rxq != 4) && (num_rxq != 8)) { + dev_err(&pdev->dev, "Invalid RX queue, edma probe failed\n"); +@@ -747,7 +743,7 @@ static int edma_axi_probe(struct platform_device *pdev) + /* Initialize the netdev array before allocation + * to avoid double free + */ +- for (i = 0 ; i < edma_cinfo->num_gmac ; i++) ++ for (i = 0 ; i < EDMA_MAX_PORTID_SUPPORTED; i++) + edma_netdev[i] = NULL; + + for (i = 0 ; i < edma_cinfo->num_gmac ; i++) { +@@ -768,8 +764,11 @@ static int edma_axi_probe(struct platform_device *pdev) + + /* Fill ring details */ + edma_cinfo->num_tx_queues = EDMA_MAX_TRANSMIT_QUEUE; +- edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / 4); ++ edma_cinfo->num_txq_per_core = (EDMA_MAX_TRANSMIT_QUEUE / num_online_cpus()); ++ edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus()); + edma_cinfo->tx_ring_count = EDMA_TX_RING_SIZE; ++ if (edma_cinfo->num_txq_per_core == 0) ++ edma_cinfo->num_txq_per_core = 1; + + /* Update num rx queues based on module parameter */ + edma_cinfo->num_rx_queues = num_rxq; +@@ -939,6 +938,13 @@ static int edma_axi_probe(struct platform_device *pdev) + idx_mac++; + } + ++ if (edma_cinfo->num_gmac == 1) { ++ netdev_group = 1; ++ edma_cinfo->num_txq_per_core_netdev = (EDMA_MAX_TRANSMIT_QUEUE / netdev_group / num_online_cpus()); ++ } ++ if (edma_cinfo->num_txq_per_core_netdev == 0) ++ edma_cinfo->num_txq_per_core_netdev = 1; ++ + /* Populate the adapter structure register the netdevice */ + for (i = 0; i < edma_cinfo->num_gmac; i++) { + int k, m; +@@ -946,17 +952,16 @@ static int edma_axi_probe(struct platform_device *pdev) + adapter[i] = netdev_priv(edma_netdev[i]); + adapter[i]->netdev = edma_netdev[i]; + adapter[i]->pdev = pdev; +- for (j = 0; j < CONFIG_NR_CPUS; j++) { +- m = i % 2; +- adapter[i]->tx_start_offset[j] = +- ((j << EDMA_TX_CPU_START_SHIFT) + (m << 1)); ++ for (j = 0; j < num_online_cpus() && j < EDMA_CPU_CORES_SUPPORTED; j++) { ++ m = i % netdev_group; ++ adapter[i]->tx_start_offset[j] = j * edma_cinfo->num_txq_per_core + m * edma_cinfo->num_txq_per_core_netdev; + /* Share the queues with available net-devices. + * For instance , with 5 net-devices + * eth0/eth2/eth4 will share q0,q1,q4,q5,q8,q9,q12,q13 + * and eth1/eth3 will get the remaining. + */ + for (k = adapter[i]->tx_start_offset[j]; k < +- (adapter[i]->tx_start_offset[j] + 2); k++) { ++ (adapter[i]->tx_start_offset[j] + edma_cinfo->num_txq_per_core_netdev); k++) { + if (edma_fill_netdev(edma_cinfo, k, i, j)) { + pr_err("Netdev overflow Error\n"); + goto err_register; +@@ -1109,9 +1114,12 @@ static int edma_axi_probe(struct platform_device *pdev) + /* populate per_core_info, do a napi_Add, request 16 TX irqs, + * 8 RX irqs, do a napi enable + */ +- for (i = 0; i < CONFIG_NR_CPUS; i++) { ++ for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) { + u8 rx_start; + ++ tx_mask[i] = (0xFFFF >> (16 - edma_cinfo->num_txq_per_core)) << (i * edma_cinfo->num_txq_per_core); ++ tx_start[i] = i * edma_cinfo->num_txq_per_core; ++ + edma_cinfo->edma_percpu_info[i].napi.state = 0; + + netif_napi_add(edma_netdev[0], +@@ -1131,7 +1139,7 @@ static int edma_axi_probe(struct platform_device *pdev) + + /* Request irq per core */ + for (j = edma_cinfo->edma_percpu_info[i].tx_start; +- j < tx_start[i] + 4; j++) { ++ j < tx_start[i] + edma_cinfo->num_txq_per_core; j++) { + sprintf(&edma_tx_irq[j][0], "edma_eth_tx%d", j); + err = request_irq(edma_cinfo->tx_irq[j], + edma_interrupt, +@@ -1253,7 +1261,7 @@ static int edma_axi_probe(struct platform_device *pdev) + #endif + err_rmap_add_fail: + edma_free_irqs(adapter[0]); +- for (i = 0; i < CONFIG_NR_CPUS; i++) ++ for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) + napi_disable(&edma_cinfo->edma_percpu_info[i].napi); + err_reset: + err_unregister_sysctl_tbl: +@@ -1301,7 +1309,7 @@ static int edma_axi_remove(struct platform_device *pdev) + unregister_netdev(edma_netdev[i]); + + edma_stop_rx_tx(hw); +- for (i = 0; i < CONFIG_NR_CPUS; i++) ++ for (i = 0; i < num_online_cpus() && i < EDMA_CPU_CORES_SUPPORTED; i++) + napi_disable(&edma_cinfo->edma_percpu_info[i].napi); + + edma_irq_disable(edma_cinfo); diff --git a/target/linux/ipq40xx/patches-5.4/716-essedma-reduce-write-reg.patch b/target/linux/ipq40xx/patches-5.4/716-essedma-reduce-write-reg.patch new file mode 100644 index 000000000..58b87467d --- /dev/null +++ b/target/linux/ipq40xx/patches-5.4/716-essedma-reduce-write-reg.patch @@ -0,0 +1,54 @@ +diff --git a/drivers/net/ethernet/qualcomm/essedma/edma.c b/drivers/net/ethernet/qualcomm/essedma/edma.c +index fc274c8..e9d12a4 100644 +--- a/drivers/net/ethernet/qualcomm/essedma/edma.c ++++ b/drivers/net/ethernet/qualcomm/essedma/edma.c +@@ -2075,15 +2075,13 @@ int edma_poll(struct napi_struct *napi, int budget) + int i, work_done = 0; + u16 rx_pending_fill; + +- /* Store the Rx/Tx status by ANDing it with +- * appropriate CPU RX?TX mask ++ /* Store the Tx status by ANDing it with ++ * appropriate CPU TX mask + */ +- edma_read_reg(EDMA_REG_RX_ISR, ®_data); +- edma_percpu_info->rx_status |= reg_data & edma_percpu_info->rx_mask; +- shadow_rx_status = edma_percpu_info->rx_status; + edma_read_reg(EDMA_REG_TX_ISR, ®_data); + edma_percpu_info->tx_status |= reg_data & edma_percpu_info->tx_mask; + shadow_tx_status = edma_percpu_info->tx_status; ++ edma_write_reg(EDMA_REG_TX_ISR, shadow_tx_status); + + /* Every core will have a start, which will be computed + * in probe and stored in edma_percpu_info->tx_start variable. +@@ -2098,6 +2096,14 @@ int edma_poll(struct napi_struct *napi, int budget) + edma_percpu_info->tx_status &= ~(1 << queue_id); + } + ++ /* Store the Rx status by ANDing it with ++ * appropriate CPU RX mask ++ */ ++ edma_read_reg(EDMA_REG_RX_ISR, ®_data); ++ edma_percpu_info->rx_status |= reg_data & edma_percpu_info->rx_mask; ++ shadow_rx_status = edma_percpu_info->rx_status; ++ edma_write_reg(EDMA_REG_RX_ISR, shadow_rx_status); ++ + /* Every core will have a start, which will be computed + * in probe and stored in edma_percpu_info->tx_start variable. + * We will shift the status bit by tx_start to obtain +@@ -2122,15 +2128,6 @@ int edma_poll(struct napi_struct *napi, int budget) + } + } + +- /* Clear the status register, to avoid the interrupts to +- * reoccur.This clearing of interrupt status register is +- * done here as writing to status register only takes place +- * once the producer/consumer index has been updated to +- * reflect that the packet transmission/reception went fine. +- */ +- edma_write_reg(EDMA_REG_RX_ISR, shadow_rx_status); +- edma_write_reg(EDMA_REG_TX_ISR, shadow_tx_status); +- + /* If budget not fully consumed, exit the polling mode */ + if (likely(work_done < budget)) { + napi_complete(napi);