lede/target/linux/qualcommbe/patches-6.6/103-41-net-ethernet-qualcomm-Add-Tx-Ethernet-DMA-support.patch
John Audia d989a3256a qualcommb/ipq95xx: refresh patches ahead of 6.6.75
Refreshed patches for qualcommb/ipq95xx by running
make target/linux/refresh after creating a .config containing:
CONFIG_TARGET_qualcommbe=y
CONFIG_TARGET_qualcommbe_ipq95xx=y
CONFIG_TARGET_qualcommbe_ipq95xx_DEVICE_qcom_rdp433=y

Signed-off-by: John Audia <therealgraysky@proton.me>
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
2025-02-18 11:00:26 +08:00

2365 lines
72 KiB
Diff

From 1c2736afc17435d3bca18a84f9ed2620a5b03830 Mon Sep 17 00:00:00 2001
From: Suruchi Agarwal <quic_suruchia@quicinc.com>
Date: Thu, 21 Mar 2024 16:26:29 -0700
Subject: [PATCH 41/50] net: ethernet: qualcomm: Add Tx Ethernet DMA support
Add Tx queues, rings, descriptors configurations and
DMA support for the EDMA.
Change-Id: Idfb0e1fe5ac494d614097d6c97dd15d63bbce8e6
Co-developed-by: Pavithra R <quic_pavir@quicinc.com>
Signed-off-by: Pavithra R <quic_pavir@quicinc.com>
Signed-off-by: Suruchi Agarwal <quic_suruchia@quicinc.com>
---
drivers/net/ethernet/qualcomm/ppe/Makefile | 2 +-
drivers/net/ethernet/qualcomm/ppe/edma.c | 97 ++-
drivers/net/ethernet/qualcomm/ppe/edma.h | 7 +
.../net/ethernet/qualcomm/ppe/edma_cfg_tx.c | 648 ++++++++++++++
.../net/ethernet/qualcomm/ppe/edma_cfg_tx.h | 28 +
drivers/net/ethernet/qualcomm/ppe/edma_port.c | 136 +++
drivers/net/ethernet/qualcomm/ppe/edma_port.h | 35 +
drivers/net/ethernet/qualcomm/ppe/edma_tx.c | 808 ++++++++++++++++++
drivers/net/ethernet/qualcomm/ppe/edma_tx.h | 302 +++++++
9 files changed, 2055 insertions(+), 8 deletions(-)
create mode 100644 drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.c
create mode 100644 drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.h
create mode 100644 drivers/net/ethernet/qualcomm/ppe/edma_tx.c
create mode 100644 drivers/net/ethernet/qualcomm/ppe/edma_tx.h
--- a/drivers/net/ethernet/qualcomm/ppe/Makefile
+++ b/drivers/net/ethernet/qualcomm/ppe/Makefile
@@ -7,4 +7,4 @@ obj-$(CONFIG_QCOM_PPE) += qcom-ppe.o
qcom-ppe-objs := ppe.o ppe_config.o ppe_api.o ppe_debugfs.o ppe_port.o
#EDMA
-qcom-ppe-objs += edma.o edma_cfg_rx.o edma_port.o edma_rx.o
\ No newline at end of file
+qcom-ppe-objs += edma.o edma_cfg_rx.o edma_cfg_tx.o edma_port.o edma_rx.o edma_tx.o
--- a/drivers/net/ethernet/qualcomm/ppe/edma.c
+++ b/drivers/net/ethernet/qualcomm/ppe/edma.c
@@ -18,6 +18,7 @@
#include <linux/reset.h>
#include "edma.h"
+#include "edma_cfg_tx.h"
#include "edma_cfg_rx.h"
#include "ppe_regs.h"
@@ -25,6 +26,7 @@
/* Global EDMA context. */
struct edma_context *edma_ctx;
+static char **edma_txcmpl_irq_name;
static char **edma_rxdesc_irq_name;
/* Module params. */
@@ -192,22 +194,59 @@ static int edma_configure_ucast_prio_map
static int edma_irq_register(void)
{
struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
struct edma_ring_info *rx = hw_info->rx;
int ret;
u32 i;
+ /* Request IRQ for TXCMPL rings. */
+ edma_txcmpl_irq_name = kzalloc((sizeof(char *) * txcmpl->num_rings), GFP_KERNEL);
+ if (!edma_txcmpl_irq_name)
+ return -ENOMEM;
+
+ for (i = 0; i < txcmpl->num_rings; i++) {
+ edma_txcmpl_irq_name[i] = kzalloc((sizeof(char *) * EDMA_IRQ_NAME_SIZE),
+ GFP_KERNEL);
+ if (!edma_txcmpl_irq_name[i]) {
+ ret = -ENOMEM;
+ goto txcmpl_ring_irq_name_alloc_fail;
+ }
+
+ snprintf(edma_txcmpl_irq_name[i], EDMA_IRQ_NAME_SIZE, "edma_txcmpl_%d",
+ txcmpl->ring_start + i);
+
+ irq_set_status_flags(edma_ctx->intr_info.intr_txcmpl[i], IRQ_DISABLE_UNLAZY);
+
+ ret = request_irq(edma_ctx->intr_info.intr_txcmpl[i],
+ edma_tx_handle_irq, IRQF_SHARED,
+ edma_txcmpl_irq_name[i],
+ (void *)&edma_ctx->txcmpl_rings[i]);
+ if (ret) {
+ pr_err("TXCMPL ring IRQ:%d request %d failed\n",
+ edma_ctx->intr_info.intr_txcmpl[i], i);
+ goto txcmpl_ring_intr_req_fail;
+ }
+
+ pr_debug("TXCMPL ring: %d IRQ:%d request success: %s\n",
+ txcmpl->ring_start + i,
+ edma_ctx->intr_info.intr_txcmpl[i],
+ edma_txcmpl_irq_name[i]);
+ }
+
/* Request IRQ for RXDESC rings. */
edma_rxdesc_irq_name = kzalloc((sizeof(char *) * rx->num_rings),
GFP_KERNEL);
- if (!edma_rxdesc_irq_name)
- return -ENOMEM;
+ if (!edma_rxdesc_irq_name) {
+ ret = -ENOMEM;
+ goto rxdesc_irq_name_alloc_fail;
+ }
for (i = 0; i < rx->num_rings; i++) {
edma_rxdesc_irq_name[i] = kzalloc((sizeof(char *) * EDMA_IRQ_NAME_SIZE),
GFP_KERNEL);
if (!edma_rxdesc_irq_name[i]) {
ret = -ENOMEM;
- goto rxdesc_irq_name_alloc_fail;
+ goto rxdesc_ring_irq_name_alloc_fail;
}
snprintf(edma_rxdesc_irq_name[i], 20, "edma_rxdesc_%d",
@@ -236,8 +275,19 @@ static int edma_irq_register(void)
rx_desc_ring_intr_req_fail:
for (i = 0; i < rx->num_rings; i++)
kfree(edma_rxdesc_irq_name[i]);
-rxdesc_irq_name_alloc_fail:
+rxdesc_ring_irq_name_alloc_fail:
kfree(edma_rxdesc_irq_name);
+rxdesc_irq_name_alloc_fail:
+ for (i = 0; i < txcmpl->num_rings; i++) {
+ synchronize_irq(edma_ctx->intr_info.intr_txcmpl[i]);
+ free_irq(edma_ctx->intr_info.intr_txcmpl[i],
+ (void *)&edma_ctx->txcmpl_rings[i]);
+ }
+txcmpl_ring_intr_req_fail:
+ for (i = 0; i < txcmpl->num_rings; i++)
+ kfree(edma_txcmpl_irq_name[i]);
+txcmpl_ring_irq_name_alloc_fail:
+ kfree(edma_txcmpl_irq_name);
return ret;
}
@@ -326,12 +376,22 @@ static int edma_irq_init(void)
static int edma_alloc_rings(void)
{
+ if (edma_cfg_tx_rings_alloc()) {
+ pr_err("Error in allocating Tx rings\n");
+ return -ENOMEM;
+ }
+
if (edma_cfg_rx_rings_alloc()) {
pr_err("Error in allocating Rx rings\n");
- return -ENOMEM;
+ goto rx_rings_alloc_fail;
}
return 0;
+
+rx_rings_alloc_fail:
+ edma_cfg_tx_rings_cleanup();
+
+ return -ENOMEM;
}
static int edma_hw_reset(void)
@@ -389,7 +449,7 @@ static int edma_hw_configure(void)
struct edma_hw_info *hw_info = edma_ctx->hw_info;
struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
struct regmap *regmap = ppe_dev->regmap;
- u32 data, reg;
+ u32 data, reg, i;
int ret;
reg = EDMA_BASE_OFFSET + EDMA_REG_MAS_CTRL_ADDR;
@@ -439,11 +499,17 @@ static int edma_hw_configure(void)
}
/* Disable interrupts. */
+ for (i = 1; i <= hw_info->max_ports; i++)
+ edma_cfg_tx_disable_interrupts(i);
+
edma_cfg_rx_disable_interrupts();
edma_cfg_rx_rings_disable();
edma_cfg_rx_ring_mappings();
+ edma_cfg_tx_ring_mappings();
+
+ edma_cfg_tx_rings();
ret = edma_cfg_rx_rings();
if (ret) {
@@ -520,6 +586,7 @@ configure_ucast_prio_map_tbl_failed:
edma_cfg_rx_napi_delete();
edma_cfg_rx_rings_disable();
edma_cfg_rx_rings_failed:
+ edma_cfg_tx_rings_cleanup();
edma_cfg_rx_rings_cleanup();
edma_alloc_rings_failed:
free_netdev(edma_ctx->dummy_dev);
@@ -538,13 +605,27 @@ dummy_dev_alloc_failed:
void edma_destroy(struct ppe_device *ppe_dev)
{
struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
struct edma_ring_info *rx = hw_info->rx;
u32 i;
/* Disable interrupts. */
+ for (i = 1; i <= hw_info->max_ports; i++)
+ edma_cfg_tx_disable_interrupts(i);
+
edma_cfg_rx_disable_interrupts();
- /* Free IRQ for RXDESC rings. */
+ /* Free IRQ for TXCMPL rings. */
+ for (i = 0; i < txcmpl->num_rings; i++) {
+ synchronize_irq(edma_ctx->intr_info.intr_txcmpl[i]);
+
+ free_irq(edma_ctx->intr_info.intr_txcmpl[i],
+ (void *)&edma_ctx->txcmpl_rings[i]);
+ kfree(edma_txcmpl_irq_name[i]);
+ }
+ kfree(edma_txcmpl_irq_name);
+
+ /* Free IRQ for RXDESC rings */
for (i = 0; i < rx->num_rings; i++) {
synchronize_irq(edma_ctx->intr_info.intr_rx[i]);
free_irq(edma_ctx->intr_info.intr_rx[i],
@@ -560,6 +641,7 @@ void edma_destroy(struct ppe_device *ppe
edma_cfg_rx_napi_delete();
edma_cfg_rx_rings_disable();
edma_cfg_rx_rings_cleanup();
+ edma_cfg_tx_rings_cleanup();
free_netdev(edma_ctx->dummy_dev);
kfree(edma_ctx->netdev_arr);
@@ -585,6 +667,7 @@ int edma_setup(struct ppe_device *ppe_de
edma_ctx->hw_info = &ipq9574_hw_info;
edma_ctx->ppe_dev = ppe_dev;
edma_ctx->rx_buf_size = rx_buff_size;
+ edma_ctx->tx_requeue_stop = false;
/* Configure the EDMA common clocks. */
ret = edma_clock_init();
--- a/drivers/net/ethernet/qualcomm/ppe/edma.h
+++ b/drivers/net/ethernet/qualcomm/ppe/edma.h
@@ -7,6 +7,7 @@
#include "ppe_api.h"
#include "edma_rx.h"
+#include "edma_tx.h"
/* One clock cycle = 1/(EDMA clock frequency in Mhz) micro seconds.
*
@@ -94,8 +95,11 @@ struct edma_intr_info {
* @intr_info: EDMA Interrupt info
* @rxfill_rings: Rx fill Rings, SW is producer
* @rx_rings: Rx Desc Rings, SW is consumer
+ * @tx_rings: Tx Descriptor Ring, SW is producer
+ * @txcmpl_rings: Tx complete Ring, SW is consumer
* @rx_page_mode: Page mode enabled or disabled
* @rx_buf_size: Rx buffer size for Jumbo MRU
+ * @tx_requeue_stop: Tx requeue stop enabled or disabled
*/
struct edma_context {
struct net_device **netdev_arr;
@@ -105,8 +109,11 @@ struct edma_context {
struct edma_intr_info intr_info;
struct edma_rxfill_ring *rxfill_rings;
struct edma_rxdesc_ring *rx_rings;
+ struct edma_txdesc_ring *tx_rings;
+ struct edma_txcmpl_ring *txcmpl_rings;
u32 rx_page_mode;
u32 rx_buf_size;
+ bool tx_requeue_stop;
};
/* Global EDMA context */
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.c
@@ -0,0 +1,648 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+/* Configure rings, Buffers and NAPI for transmit path along with
+ * providing APIs to enable, disable, clean and map the Tx rings.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/printk.h>
+#include <linux/regmap.h>
+#include <linux/skbuff.h>
+
+#include "edma.h"
+#include "edma_cfg_tx.h"
+#include "edma_port.h"
+#include "ppe.h"
+#include "ppe_regs.h"
+
+static void edma_cfg_txcmpl_ring_cleanup(struct edma_txcmpl_ring *txcmpl_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+
+ /* Free any buffers assigned to any descriptors. */
+ edma_tx_complete(EDMA_TX_RING_SIZE - 1, txcmpl_ring);
+
+ /* Free TxCmpl ring descriptors. */
+ dma_free_coherent(dev, sizeof(struct edma_txcmpl_desc)
+ * txcmpl_ring->count, txcmpl_ring->desc,
+ txcmpl_ring->dma);
+ txcmpl_ring->desc = NULL;
+ txcmpl_ring->dma = (dma_addr_t)0;
+}
+
+static int edma_cfg_txcmpl_ring_setup(struct edma_txcmpl_ring *txcmpl_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+
+ /* Allocate RxFill ring descriptors. */
+ txcmpl_ring->desc = dma_alloc_coherent(dev, sizeof(struct edma_txcmpl_desc)
+ * txcmpl_ring->count,
+ &txcmpl_ring->dma,
+ GFP_KERNEL | __GFP_ZERO);
+
+ if (unlikely(!txcmpl_ring->desc))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void edma_cfg_tx_desc_ring_cleanup(struct edma_txdesc_ring *txdesc_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txdesc_pri *txdesc = NULL;
+ struct device *dev = ppe_dev->dev;
+ u32 prod_idx, cons_idx, data, reg;
+ struct sk_buff *skb = NULL;
+
+ /* Free any buffers assigned to any descriptors. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_PROD_IDX(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ prod_idx = data & EDMA_TXDESC_PROD_IDX_MASK;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CONS_IDX(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ cons_idx = data & EDMA_TXDESC_CONS_IDX_MASK;
+
+ /* Walk active list, obtain skb from descriptor and free it. */
+ while (cons_idx != prod_idx) {
+ txdesc = EDMA_TXDESC_PRI_DESC(txdesc_ring, cons_idx);
+ skb = (struct sk_buff *)EDMA_TXDESC_OPAQUE_GET(txdesc);
+ dev_kfree_skb_any(skb);
+
+ cons_idx = ((cons_idx + 1) & EDMA_TX_RING_SIZE_MASK);
+ }
+
+ /* Free Tx ring descriptors. */
+ dma_free_coherent(dev, (sizeof(struct edma_txdesc_pri)
+ * txdesc_ring->count),
+ txdesc_ring->pdesc,
+ txdesc_ring->pdma);
+ txdesc_ring->pdesc = NULL;
+ txdesc_ring->pdma = (dma_addr_t)0;
+
+ /* Free any buffers assigned to any secondary descriptors. */
+ dma_free_coherent(dev, (sizeof(struct edma_txdesc_sec)
+ * txdesc_ring->count),
+ txdesc_ring->sdesc,
+ txdesc_ring->sdma);
+ txdesc_ring->sdesc = NULL;
+ txdesc_ring->sdma = (dma_addr_t)0;
+}
+
+static int edma_cfg_tx_desc_ring_setup(struct edma_txdesc_ring *txdesc_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+
+ /* Allocate RxFill ring descriptors. */
+ txdesc_ring->pdesc = dma_alloc_coherent(dev, sizeof(struct edma_txdesc_pri)
+ * txdesc_ring->count,
+ &txdesc_ring->pdma,
+ GFP_KERNEL | __GFP_ZERO);
+
+ if (unlikely(!txdesc_ring->pdesc))
+ return -ENOMEM;
+
+ txdesc_ring->sdesc = dma_alloc_coherent(dev, sizeof(struct edma_txdesc_sec)
+ * txdesc_ring->count,
+ &txdesc_ring->sdma,
+ GFP_KERNEL | __GFP_ZERO);
+
+ if (unlikely(!txdesc_ring->sdesc)) {
+ dma_free_coherent(dev, (sizeof(struct edma_txdesc_pri)
+ * txdesc_ring->count),
+ txdesc_ring->pdesc,
+ txdesc_ring->pdma);
+ txdesc_ring->pdesc = NULL;
+ txdesc_ring->pdma = (dma_addr_t)0;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void edma_cfg_tx_desc_ring_configure(struct edma_txdesc_ring *txdesc_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 data, reg;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_BA(txdesc_ring->id);
+ regmap_write(regmap, reg, (u32)(txdesc_ring->pdma & EDMA_RING_DMA_MASK));
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_BA2(txdesc_ring->id);
+ regmap_write(regmap, reg, (u32)(txdesc_ring->sdma & EDMA_RING_DMA_MASK));
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_RING_SIZE(txdesc_ring->id);
+ regmap_write(regmap, reg, (u32)(txdesc_ring->count & EDMA_TXDESC_RING_SIZE_MASK));
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_PROD_IDX(txdesc_ring->id);
+ regmap_write(regmap, reg, (u32)EDMA_TX_INITIAL_PROD_IDX);
+
+ data = FIELD_PREP(EDMA_TXDESC_CTRL_FC_GRP_ID_MASK, txdesc_ring->fc_grp_id);
+
+ /* Configure group ID for flow control for this Tx ring. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CTRL(txdesc_ring->id);
+ regmap_write(regmap, reg, data);
+}
+
+static void edma_cfg_txcmpl_ring_configure(struct edma_txcmpl_ring *txcmpl_ring)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 data, reg;
+
+ /* Configure TxCmpl ring base address. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_BA(txcmpl_ring->id);
+ regmap_write(regmap, reg, (u32)(txcmpl_ring->dma & EDMA_RING_DMA_MASK));
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_RING_SIZE(txcmpl_ring->id);
+ regmap_write(regmap, reg, (u32)(txcmpl_ring->count & EDMA_TXDESC_RING_SIZE_MASK));
+
+ /* Set TxCmpl ret mode to opaque. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_CTRL(txcmpl_ring->id);
+ regmap_write(regmap, reg, EDMA_TXCMPL_RETMODE_OPAQUE);
+
+ /* Configure the Mitigation timer. */
+ data = EDMA_MICROSEC_TO_TIMER_UNIT(EDMA_TX_MITIGATION_TIMER_DEF,
+ ppe_dev->clk_rate / MHZ);
+ data = ((data & EDMA_TX_MOD_TIMER_INIT_MASK)
+ << EDMA_TX_MOD_TIMER_INIT_SHIFT);
+ pr_debug("EDMA Tx mitigation timer value: %d\n", data);
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_MOD_TIMER(txcmpl_ring->id);
+ regmap_write(regmap, reg, data);
+
+ /* Configure the Mitigation packet count. */
+ data = (EDMA_TX_MITIGATION_PKT_CNT_DEF & EDMA_TXCMPL_LOW_THRE_MASK)
+ << EDMA_TXCMPL_LOW_THRE_SHIFT;
+ pr_debug("EDMA Tx mitigation packet count value: %d\n", data);
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_UGT_THRE(txcmpl_ring->id);
+ regmap_write(regmap, reg, data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_CTRL(txcmpl_ring->id);
+ regmap_write(regmap, reg, EDMA_TX_NE_INT_EN);
+}
+
+/**
+ * edma_cfg_tx_fill_per_port_tx_map - Fill Tx ring mapping.
+ * @netdev: Netdevice.
+ * @port_id: Port ID.
+ *
+ * Fill per-port Tx ring mapping in net device private area.
+ */
+void edma_cfg_tx_fill_per_port_tx_map(struct net_device *netdev, u32 port_id)
+{
+ u32 i;
+
+ /* Ring to core mapping is done in order starting from 0 for port 1. */
+ for_each_possible_cpu(i) {
+ struct edma_port_priv *port_dev = (struct edma_port_priv *)netdev_priv(netdev);
+ struct edma_txdesc_ring *txdesc_ring;
+ u32 txdesc_ring_id;
+
+ txdesc_ring_id = ((port_id - 1) * num_possible_cpus()) + i;
+ txdesc_ring = &edma_ctx->tx_rings[txdesc_ring_id];
+ port_dev->txr_map[i] = txdesc_ring;
+ }
+}
+
+/**
+ * edma_cfg_tx_rings_enable - Enable Tx rings.
+ *
+ * Enable Tx rings.
+ */
+void edma_cfg_tx_rings_enable(u32 port_id)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txdesc_ring *txdesc_ring;
+ u32 i, ring_idx, reg;
+
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txdesc_ring = &edma_ctx->tx_rings[ring_idx];
+ u32 data;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CTRL(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ data |= FIELD_PREP(EDMA_TXDESC_CTRL_TXEN_MASK, EDMA_TXDESC_TX_ENABLE);
+
+ regmap_write(regmap, reg, data);
+ }
+}
+
+/**
+ * edma_cfg_tx_rings_disable - Disable Tx rings.
+ *
+ * Disable Tx rings.
+ */
+void edma_cfg_tx_rings_disable(u32 port_id)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txdesc_ring *txdesc_ring;
+ u32 i, ring_idx, reg;
+
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txdesc_ring = &edma_ctx->tx_rings[ring_idx];
+ u32 data;
+
+ txdesc_ring = &edma_ctx->tx_rings[i];
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CTRL(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ data &= ~EDMA_TXDESC_TX_ENABLE;
+ regmap_write(regmap, reg, data);
+ }
+}
+
+/**
+ * edma_cfg_tx_ring_mappings - Map Tx to Tx complete rings.
+ *
+ * Map Tx to Tx complete rings.
+ */
+void edma_cfg_tx_ring_mappings(void)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_ring_info *tx = hw_info->tx;
+ u32 desc_index, i, data, reg;
+
+ /* Clear the TXDESC2CMPL_MAP_xx reg before setting up
+ * the mapping. This register holds TXDESC to TXFILL ring
+ * mapping.
+ */
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_0_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_1_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_2_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_3_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_4_ADDR, 0);
+ regmap_write(regmap, EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_5_ADDR, 0);
+ desc_index = txcmpl->ring_start;
+
+ /* 6 registers to hold the completion mapping for total 32
+ * TX desc rings (0-5, 6-11, 12-17, 18-23, 24-29 and rest).
+ * In each entry 5 bits hold the mapping for a particular TX desc ring.
+ */
+ for (i = tx->ring_start; i < tx->ring_start + tx->num_rings; i++) {
+ u32 reg, data;
+
+ if (i >= 0 && i <= 5)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_0_ADDR;
+ else if (i >= 6 && i <= 11)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_1_ADDR;
+ else if (i >= 12 && i <= 17)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_2_ADDR;
+ else if (i >= 18 && i <= 23)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_3_ADDR;
+ else if (i >= 24 && i <= 29)
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_4_ADDR;
+ else
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_5_ADDR;
+
+ pr_debug("Configure Tx desc:%u to use TxCmpl:%u\n", i, desc_index);
+
+ /* Set the Tx complete descriptor ring number in the mapping register.
+ * E.g. If (txcmpl ring)desc_index = 31, (txdesc ring)i = 28.
+ * reg = EDMA_REG_TXDESC2CMPL_MAP_4_ADDR
+ * data |= (desc_index & 0x1F) << ((i % 6) * 5);
+ * data |= (0x1F << 20); -
+ * This sets 11111 at 20th bit of register EDMA_REG_TXDESC2CMPL_MAP_4_ADDR.
+ */
+ regmap_read(regmap, reg, &data);
+ data |= (desc_index & EDMA_TXDESC2CMPL_MAP_TXDESC_MASK) << ((i % 6) * 5);
+ regmap_write(regmap, reg, data);
+
+ desc_index++;
+ if (desc_index == txcmpl->ring_start + txcmpl->num_rings)
+ desc_index = txcmpl->ring_start;
+ }
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_0_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_0_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_1_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_1_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_2_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_2_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_3_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_3_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_4_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_4_ADDR: 0x%x\n", data);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC2CMPL_MAP_5_ADDR;
+ regmap_read(regmap, reg, &data);
+ pr_debug("EDMA_REG_TXDESC2CMPL_MAP_5_ADDR: 0x%x\n", data);
+}
+
+static int edma_cfg_tx_rings_setup(void)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct edma_ring_info *tx = hw_info->tx;
+ u32 i, j = 0;
+
+ /* Set Txdesc flow control group id, same as port number. */
+ for (i = 0; i < hw_info->max_ports; i++) {
+ for_each_possible_cpu(j) {
+ struct edma_txdesc_ring *txdesc_ring = NULL;
+ u32 txdesc_idx = (i * num_possible_cpus()) + j;
+
+ txdesc_ring = &edma_ctx->tx_rings[txdesc_idx];
+ txdesc_ring->fc_grp_id = i + 1;
+ }
+ }
+
+ /* Allocate TxDesc ring descriptors. */
+ for (i = 0; i < tx->num_rings; i++) {
+ struct edma_txdesc_ring *txdesc_ring = NULL;
+ int ret;
+
+ txdesc_ring = &edma_ctx->tx_rings[i];
+ txdesc_ring->count = EDMA_TX_RING_SIZE;
+ txdesc_ring->id = tx->ring_start + i;
+
+ ret = edma_cfg_tx_desc_ring_setup(txdesc_ring);
+ if (ret) {
+ pr_err("Error in setting up %d txdesc ring. ret: %d",
+ txdesc_ring->id, ret);
+ while (i-- >= 0)
+ edma_cfg_tx_desc_ring_cleanup(&edma_ctx->tx_rings[i]);
+
+ return -ENOMEM;
+ }
+ }
+
+ /* Allocate TxCmpl ring descriptors. */
+ for (i = 0; i < txcmpl->num_rings; i++) {
+ struct edma_txcmpl_ring *txcmpl_ring = NULL;
+ int ret;
+
+ txcmpl_ring = &edma_ctx->txcmpl_rings[i];
+ txcmpl_ring->count = EDMA_TX_RING_SIZE;
+ txcmpl_ring->id = txcmpl->ring_start + i;
+
+ ret = edma_cfg_txcmpl_ring_setup(txcmpl_ring);
+ if (ret != 0) {
+ pr_err("Error in setting up %d TxCmpl ring. ret: %d",
+ txcmpl_ring->id, ret);
+ while (i-- >= 0)
+ edma_cfg_txcmpl_ring_cleanup(&edma_ctx->txcmpl_rings[i]);
+
+ goto txcmpl_mem_alloc_fail;
+ }
+ }
+
+ pr_debug("Tx descriptor count for Tx desc and Tx complete rings: %d\n",
+ EDMA_TX_RING_SIZE);
+
+ return 0;
+
+txcmpl_mem_alloc_fail:
+ for (i = 0; i < tx->num_rings; i++)
+ edma_cfg_tx_desc_ring_cleanup(&edma_ctx->tx_rings[i]);
+
+ return -ENOMEM;
+}
+
+/**
+ * edma_cfg_tx_rings_alloc - Allocate EDMA Tx rings.
+ *
+ * Allocate EDMA Tx rings.
+ */
+int edma_cfg_tx_rings_alloc(void)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct edma_ring_info *tx = hw_info->tx;
+
+ edma_ctx->tx_rings = kzalloc((sizeof(*edma_ctx->tx_rings) * tx->num_rings),
+ GFP_KERNEL);
+ if (!edma_ctx->tx_rings)
+ return -ENOMEM;
+
+ edma_ctx->txcmpl_rings = kzalloc((sizeof(*edma_ctx->txcmpl_rings) * txcmpl->num_rings),
+ GFP_KERNEL);
+ if (!edma_ctx->txcmpl_rings)
+ goto txcmpl_ring_alloc_fail;
+
+ pr_debug("Num rings - TxDesc:%u (%u-%u) TxCmpl:%u (%u-%u)\n",
+ tx->num_rings, tx->ring_start,
+ (tx->ring_start + tx->num_rings - 1),
+ txcmpl->num_rings, txcmpl->ring_start,
+ (txcmpl->ring_start + txcmpl->num_rings - 1));
+
+ if (edma_cfg_tx_rings_setup()) {
+ pr_err("Error in setting up tx rings\n");
+ goto tx_rings_setup_fail;
+ }
+
+ return 0;
+
+tx_rings_setup_fail:
+ kfree(edma_ctx->txcmpl_rings);
+ edma_ctx->txcmpl_rings = NULL;
+
+txcmpl_ring_alloc_fail:
+ kfree(edma_ctx->tx_rings);
+ edma_ctx->tx_rings = NULL;
+
+ return -ENOMEM;
+}
+
+/**
+ * edma_cfg_tx_rings_cleanup - Cleanup EDMA Tx rings.
+ *
+ * Cleanup EDMA Tx rings.
+ */
+void edma_cfg_tx_rings_cleanup(void)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct edma_ring_info *tx = hw_info->tx;
+ u32 i;
+
+ /* Free any buffers assigned to any descriptors. */
+ for (i = 0; i < tx->num_rings; i++)
+ edma_cfg_tx_desc_ring_cleanup(&edma_ctx->tx_rings[i]);
+
+ /* Free Tx completion descriptors. */
+ for (i = 0; i < txcmpl->num_rings; i++)
+ edma_cfg_txcmpl_ring_cleanup(&edma_ctx->txcmpl_rings[i]);
+
+ kfree(edma_ctx->tx_rings);
+ kfree(edma_ctx->txcmpl_rings);
+ edma_ctx->tx_rings = NULL;
+ edma_ctx->txcmpl_rings = NULL;
+}
+
+/**
+ * edma_cfg_tx_rings - Configure EDMA Tx rings.
+ *
+ * Configure EDMA Tx rings.
+ */
+void edma_cfg_tx_rings(void)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_ring_info *txcmpl = hw_info->txcmpl;
+ struct edma_ring_info *tx = hw_info->tx;
+ u32 i;
+
+ /* Configure Tx desc ring. */
+ for (i = 0; i < tx->num_rings; i++)
+ edma_cfg_tx_desc_ring_configure(&edma_ctx->tx_rings[i]);
+
+ /* Configure TxCmpl ring. */
+ for (i = 0; i < txcmpl->num_rings; i++)
+ edma_cfg_txcmpl_ring_configure(&edma_ctx->txcmpl_rings[i]);
+}
+
+/**
+ * edma_cfg_tx_disable_interrupts - EDMA disable TX interrupts.
+ *
+ * Disable TX interrupt masks.
+ */
+void edma_cfg_tx_disable_interrupts(u32 port_id)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx, reg;
+
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_MASK(txcmpl_ring->id);
+ regmap_write(regmap, reg, EDMA_MASK_INT_CLEAR);
+ }
+}
+
+/**
+ * edma_cfg_tx_enable_interrupts - EDMA enable TX interrupts.
+ *
+ * Enable TX interrupt masks.
+ */
+void edma_cfg_tx_enable_interrupts(u32 port_id)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx, reg;
+
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_MASK(txcmpl_ring->id);
+ regmap_write(regmap, reg, edma_ctx->intr_info.intr_mask_txcmpl);
+ }
+}
+
+/**
+ * edma_cfg_tx_napi_enable - EDMA Tx NAPI.
+ * @port_id: Port ID.
+ *
+ * Enable Tx NAPI.
+ */
+void edma_cfg_tx_napi_enable(u32 port_id)
+{
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx;
+
+ /* Enabling Tx napi for a interface with each queue. */
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ if (!txcmpl_ring->napi_added)
+ continue;
+
+ napi_enable(&txcmpl_ring->napi);
+ }
+}
+
+/**
+ * edma_cfg_tx_napi_disable - Disable Tx NAPI.
+ * @port_id: Port ID.
+ *
+ * Disable Tx NAPI.
+ */
+void edma_cfg_tx_napi_disable(u32 port_id)
+{
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx;
+
+ /* Disabling Tx napi for a interface with each queue. */
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ if (!txcmpl_ring->napi_added)
+ continue;
+
+ napi_disable(&txcmpl_ring->napi);
+ }
+}
+
+/**
+ * edma_cfg_tx_napi_delete - Delete Tx NAPI.
+ * @port_id: Port ID.
+ *
+ * Delete Tx NAPI.
+ */
+void edma_cfg_tx_napi_delete(u32 port_id)
+{
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx;
+
+ /* Disabling Tx napi for a interface with each queue. */
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ if (!txcmpl_ring->napi_added)
+ continue;
+
+ netif_napi_del(&txcmpl_ring->napi);
+ txcmpl_ring->napi_added = false;
+ }
+}
+
+/**
+ * edma_cfg_tx_napi_add - TX NAPI add.
+ * @netdev: Netdevice.
+ * @port_id: Port ID.
+ *
+ * TX NAPI add.
+ */
+void edma_cfg_tx_napi_add(struct net_device *netdev, u32 port_id)
+{
+ struct edma_hw_info *hw_info = edma_ctx->hw_info;
+ struct edma_txcmpl_ring *txcmpl_ring;
+ u32 i, ring_idx;
+
+ /* Adding tx napi for a interface with each queue. */
+ for_each_possible_cpu(i) {
+ ring_idx = ((port_id - 1) * num_possible_cpus()) + i;
+ txcmpl_ring = &edma_ctx->txcmpl_rings[ring_idx];
+ netif_napi_add_weight(netdev, &txcmpl_ring->napi,
+ edma_tx_napi_poll, hw_info->napi_budget_tx);
+ txcmpl_ring->napi_added = true;
+ netdev_dbg(netdev, "Napi added for txcmpl ring: %u\n", txcmpl_ring->id);
+ }
+
+ netdev_dbg(netdev, "Tx NAPI budget: %d\n", hw_info->napi_budget_tx);
+}
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_cfg_tx.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __EDMA_CFG_TX__
+#define __EDMA_CFG_TX__
+
+/* Tx mitigation timer's default value. */
+#define EDMA_TX_MITIGATION_TIMER_DEF 250
+
+/* Tx mitigation packet count default value. */
+#define EDMA_TX_MITIGATION_PKT_CNT_DEF 16
+
+void edma_cfg_tx_rings(void);
+int edma_cfg_tx_rings_alloc(void);
+void edma_cfg_tx_rings_cleanup(void);
+void edma_cfg_tx_disable_interrupts(u32 port_id);
+void edma_cfg_tx_enable_interrupts(u32 port_id);
+void edma_cfg_tx_napi_enable(u32 port_id);
+void edma_cfg_tx_napi_disable(u32 port_id);
+void edma_cfg_tx_napi_delete(u32 port_id);
+void edma_cfg_tx_napi_add(struct net_device *netdevice, u32 macid);
+void edma_cfg_tx_ring_mappings(void);
+void edma_cfg_txcmpl_mapping_fill(void);
+void edma_cfg_tx_rings_enable(u32 port_id);
+void edma_cfg_tx_rings_disable(u32 port_id);
+void edma_cfg_tx_fill_per_port_tx_map(struct net_device *netdev, u32 macid);
+#endif
--- a/drivers/net/ethernet/qualcomm/ppe/edma_port.c
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_port.c
@@ -13,6 +13,7 @@
#include "edma.h"
#include "edma_cfg_rx.h"
+#include "edma_cfg_tx.h"
#include "edma_port.h"
#include "ppe_regs.h"
@@ -35,6 +36,15 @@ static int edma_port_stats_alloc(struct
return -ENOMEM;
}
+ port_priv->pcpu_stats.tx_stats =
+ netdev_alloc_pcpu_stats(struct edma_port_tx_stats);
+ if (!port_priv->pcpu_stats.tx_stats) {
+ netdev_err(netdev, "Per-cpu EDMA Tx stats alloc failed for %s\n",
+ netdev->name);
+ free_percpu(port_priv->pcpu_stats.rx_stats);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -43,6 +53,28 @@ static void edma_port_stats_free(struct
struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
free_percpu(port_priv->pcpu_stats.rx_stats);
+ free_percpu(port_priv->pcpu_stats.tx_stats);
+}
+
+static void edma_port_configure(struct net_device *netdev)
+{
+ struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
+ struct ppe_port *port = port_priv->ppe_port;
+ int port_id = port->port_id;
+
+ edma_cfg_tx_fill_per_port_tx_map(netdev, port_id);
+ edma_cfg_tx_rings_enable(port_id);
+ edma_cfg_tx_napi_add(netdev, port_id);
+}
+
+static void edma_port_deconfigure(struct net_device *netdev)
+{
+ struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
+ struct ppe_port *port = port_priv->ppe_port;
+ int port_id = port->port_id;
+
+ edma_cfg_tx_napi_delete(port_id);
+ edma_cfg_tx_rings_disable(port_id);
}
static u16 __maybe_unused edma_port_select_queue(__maybe_unused struct net_device *netdev,
@@ -60,6 +92,7 @@ static int edma_port_open(struct net_dev
{
struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
struct ppe_port *ppe_port;
+ int port_id;
if (!port_priv)
return -EINVAL;
@@ -74,10 +107,14 @@ static int edma_port_open(struct net_dev
netdev->wanted_features |= EDMA_NETDEV_FEATURES;
ppe_port = port_priv->ppe_port;
+ port_id = ppe_port->port_id;
if (ppe_port->phylink)
phylink_start(ppe_port->phylink);
+ edma_cfg_tx_napi_enable(port_id);
+ edma_cfg_tx_enable_interrupts(port_id);
+
netif_start_queue(netdev);
return 0;
@@ -87,13 +124,21 @@ static int edma_port_close(struct net_de
{
struct edma_port_priv *port_priv = (struct edma_port_priv *)netdev_priv(netdev);
struct ppe_port *ppe_port;
+ int port_id;
if (!port_priv)
return -EINVAL;
netif_stop_queue(netdev);
+ /* 20ms delay would provide a plenty of margin to take care of in-flight packets. */
+ msleep(20);
+
ppe_port = port_priv->ppe_port;
+ port_id = ppe_port->port_id;
+
+ edma_cfg_tx_disable_interrupts(port_id);
+ edma_cfg_tx_napi_disable(port_id);
/* Phylink close. */
if (ppe_port->phylink)
@@ -137,6 +182,92 @@ static netdev_features_t edma_port_featu
return features;
}
+static netdev_tx_t edma_port_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct edma_port_priv *port_priv = NULL;
+ struct edma_port_pcpu_stats *pcpu_stats;
+ struct edma_txdesc_ring *txdesc_ring;
+ struct edma_port_tx_stats *stats;
+ enum edma_tx_gso_status result;
+ struct sk_buff *segs = NULL;
+ u8 cpu_id;
+ u32 skbq;
+ int ret;
+
+ if (!skb || !dev)
+ return NETDEV_TX_OK;
+
+ port_priv = netdev_priv(dev);
+
+ /* Select a TX ring. */
+ skbq = (skb_get_queue_mapping(skb) & (num_possible_cpus() - 1));
+
+ txdesc_ring = (struct edma_txdesc_ring *)port_priv->txr_map[skbq];
+
+ pcpu_stats = &port_priv->pcpu_stats;
+ stats = this_cpu_ptr(pcpu_stats->tx_stats);
+
+ /* HW does not support TSO for packets with more than or equal to
+ * 32 segments. Perform SW GSO for such packets.
+ */
+ result = edma_tx_gso_segment(skb, dev, &segs);
+ if (likely(result == EDMA_TX_GSO_NOT_NEEDED)) {
+ /* Transmit the packet. */
+ ret = edma_tx_ring_xmit(dev, skb, txdesc_ring, stats);
+
+ if (unlikely(ret == EDMA_TX_FAIL_NO_DESC)) {
+ if (likely(!edma_ctx->tx_requeue_stop)) {
+ cpu_id = smp_processor_id();
+ netdev_dbg(dev, "Stopping tx queue due to lack oftx descriptors\n");
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_queue_stopped[cpu_id];
+ u64_stats_update_end(&stats->syncp);
+ netif_tx_stop_queue(netdev_get_tx_queue(dev, skbq));
+ return NETDEV_TX_BUSY;
+ }
+ }
+
+ if (unlikely(ret != EDMA_TX_OK)) {
+ dev_kfree_skb_any(skb);
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_drops;
+ u64_stats_update_end(&stats->syncp);
+ }
+
+ return NETDEV_TX_OK;
+ } else if (unlikely(result == EDMA_TX_GSO_FAIL)) {
+ netdev_dbg(dev, "%p: SW GSO failed for segment size: %d\n",
+ skb, skb_shinfo(skb)->gso_segs);
+ dev_kfree_skb_any(skb);
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_gso_drop_pkts;
+ u64_stats_update_end(&stats->syncp);
+ return NETDEV_TX_OK;
+ }
+
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_gso_pkts;
+ u64_stats_update_end(&stats->syncp);
+
+ dev_kfree_skb_any(skb);
+ while (segs) {
+ skb = segs;
+ segs = segs->next;
+
+ /* Transmit the packet. */
+ ret = edma_tx_ring_xmit(dev, skb, txdesc_ring, stats);
+ if (unlikely(ret != EDMA_TX_OK)) {
+ dev_kfree_skb_any(skb);
+ u64_stats_update_begin(&stats->syncp);
+ ++stats->tx_drops;
+ u64_stats_update_end(&stats->syncp);
+ }
+ }
+
+ return NETDEV_TX_OK;
+}
+
static void edma_port_get_stats64(struct net_device *netdev,
struct rtnl_link_stats64 *stats)
{
@@ -179,6 +310,7 @@ static int edma_port_set_mac_address(str
static const struct net_device_ops edma_port_netdev_ops = {
.ndo_open = edma_port_open,
.ndo_stop = edma_port_close,
+ .ndo_start_xmit = edma_port_xmit,
.ndo_get_stats64 = edma_port_get_stats64,
.ndo_set_mac_address = edma_port_set_mac_address,
.ndo_validate_addr = eth_validate_addr,
@@ -199,6 +331,7 @@ void edma_port_destroy(struct ppe_port *
int port_id = port->port_id;
struct net_device *netdev = edma_ctx->netdev_arr[port_id - 1];
+ edma_port_deconfigure(netdev);
edma_port_stats_free(netdev);
unregister_netdev(netdev);
free_netdev(netdev);
@@ -276,6 +409,8 @@ int edma_port_setup(struct ppe_port *por
*/
edma_ctx->netdev_arr[port_id - 1] = netdev;
+ edma_port_configure(netdev);
+
/* Setup phylink. */
ret = ppe_port_phylink_setup(port, netdev);
if (ret) {
@@ -298,6 +433,7 @@ int edma_port_setup(struct ppe_port *por
register_netdev_fail:
ppe_port_phylink_destroy(port);
port_phylink_setup_fail:
+ edma_port_deconfigure(netdev);
edma_ctx->netdev_arr[port_id - 1] = NULL;
edma_port_stats_free(netdev);
stats_alloc_fail:
--- a/drivers/net/ethernet/qualcomm/ppe/edma_port.h
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_port.h
@@ -7,6 +7,8 @@
#include "ppe_port.h"
+#define EDMA_PORT_MAX_CORE 4
+
#define EDMA_NETDEV_FEATURES (NETIF_F_FRAGLIST \
| NETIF_F_SG \
| NETIF_F_RXCSUM \
@@ -35,11 +37,43 @@ struct edma_port_rx_stats {
};
/**
+ * struct edma_port_tx_stats - EDMA TX port per CPU stats for the port.
+ * @tx_pkts: Number of Tx packets
+ * @tx_bytes: Number of Tx bytes
+ * @tx_drops: Number of Tx drops
+ * @tx_nr_frag_pkts: Number of Tx nr_frag packets
+ * @tx_fraglist_pkts: Number of Tx fraglist packets
+ * @tx_fraglist_with_nr_frags_pkts: Number of Tx packets with fraglist and nr_frags
+ * @tx_tso_pkts: Number of Tx TSO packets
+ * @tx_tso_drop_pkts: Number of Tx TSO drop packets
+ * @tx_gso_pkts: Number of Tx GSO packets
+ * @tx_gso_drop_pkts: Number of Tx GSO drop packets
+ * @tx_queue_stopped: Number of Tx queue stopped packets
+ * @syncp: Synchronization pointer
+ */
+struct edma_port_tx_stats {
+ u64 tx_pkts;
+ u64 tx_bytes;
+ u64 tx_drops;
+ u64 tx_nr_frag_pkts;
+ u64 tx_fraglist_pkts;
+ u64 tx_fraglist_with_nr_frags_pkts;
+ u64 tx_tso_pkts;
+ u64 tx_tso_drop_pkts;
+ u64 tx_gso_pkts;
+ u64 tx_gso_drop_pkts;
+ u64 tx_queue_stopped[EDMA_PORT_MAX_CORE];
+ struct u64_stats_sync syncp;
+};
+
+/**
* struct edma_port_pcpu_stats - EDMA per cpu stats data structure for the port.
* @rx_stats: Per CPU Rx statistics
+ * @tx_stats: Per CPU Tx statistics
*/
struct edma_port_pcpu_stats {
struct edma_port_rx_stats __percpu *rx_stats;
+ struct edma_port_tx_stats __percpu *tx_stats;
};
/**
@@ -54,6 +88,7 @@ struct edma_port_priv {
struct ppe_port *ppe_port;
struct net_device *netdev;
struct edma_port_pcpu_stats pcpu_stats;
+ struct edma_txdesc_ring *txr_map[EDMA_PORT_MAX_CORE];
unsigned long flags;
};
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_tx.c
@@ -0,0 +1,808 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+/* Provide APIs to alloc Tx Buffers, fill the Tx descriptors and transmit
+ * Scatter Gather and linear packets, Tx complete to free the skb after transmit.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/platform_device.h>
+#include <linux/printk.h>
+#include <net/gso.h>
+#include <linux/regmap.h>
+
+#include "edma.h"
+#include "edma_cfg_tx.h"
+#include "edma_port.h"
+#include "ppe.h"
+#include "ppe_regs.h"
+
+static u32 edma_tx_num_descs_for_sg(struct sk_buff *skb)
+{
+ u32 nr_frags_first = 0, num_tx_desc_needed = 0;
+
+ /* Check if we have enough Tx descriptors for SG. */
+ if (unlikely(skb_shinfo(skb)->nr_frags)) {
+ nr_frags_first = skb_shinfo(skb)->nr_frags;
+ WARN_ON_ONCE(nr_frags_first > MAX_SKB_FRAGS);
+ num_tx_desc_needed += nr_frags_first;
+ }
+
+ /* Walk through fraglist skbs making a note of nr_frags
+ * One Tx desc for fraglist skb. Fraglist skb may have
+ * further nr_frags.
+ */
+ if (unlikely(skb_has_frag_list(skb))) {
+ struct sk_buff *iter_skb;
+
+ skb_walk_frags(skb, iter_skb) {
+ u32 nr_frags = skb_shinfo(iter_skb)->nr_frags;
+
+ WARN_ON_ONCE(nr_frags > MAX_SKB_FRAGS);
+ num_tx_desc_needed += (1 + nr_frags);
+ }
+ }
+
+ return (num_tx_desc_needed + 1);
+}
+
+/**
+ * edma_tx_gso_segment - Tx GSO.
+ * @skb: Socket Buffer.
+ * @netdev: Netdevice.
+ * @segs: SKB segments from GSO.
+ *
+ * Format skbs into GSOs.
+ *
+ * Return 1 on success, error code on failure.
+ */
+enum edma_tx_gso_status edma_tx_gso_segment(struct sk_buff *skb,
+ struct net_device *netdev, struct sk_buff **segs)
+{
+ u32 num_tx_desc_needed;
+
+ /* Check is skb is non-linear to proceed. */
+ if (likely(!skb_is_nonlinear(skb)))
+ return EDMA_TX_GSO_NOT_NEEDED;
+
+ /* Check if TSO is enabled. If so, return as skb doesn't
+ * need to be segmented by linux.
+ */
+ if (netdev->features & (NETIF_F_TSO | NETIF_F_TSO6)) {
+ num_tx_desc_needed = edma_tx_num_descs_for_sg(skb);
+ if (likely(num_tx_desc_needed <= EDMA_TX_TSO_SEG_MAX))
+ return EDMA_TX_GSO_NOT_NEEDED;
+ }
+
+ /* GSO segmentation of the skb into multiple segments. */
+ *segs = skb_gso_segment(skb, netdev->features
+ & ~(NETIF_F_TSO | NETIF_F_TSO6));
+
+ /* Check for error in GSO segmentation. */
+ if (IS_ERR_OR_NULL(*segs)) {
+ netdev_info(netdev, "Tx gso fail\n");
+ return EDMA_TX_GSO_FAIL;
+ }
+
+ return EDMA_TX_GSO_SUCCEED;
+}
+
+/**
+ * edma_tx_complete - Reap Tx completion descriptors.
+ * @work_to_do: Work to do.
+ * @txcmpl_ring: Tx Completion ring.
+ *
+ * Reap Tx completion descriptors of the transmitted
+ * packets and free the corresponding SKBs.
+ *
+ * Return the number descriptors for which Tx complete is done.
+ */
+u32 edma_tx_complete(u32 work_to_do, struct edma_txcmpl_ring *txcmpl_ring)
+{
+ struct edma_txcmpl_stats *txcmpl_stats = &txcmpl_ring->txcmpl_stats;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 cons_idx, end_idx, data, cpu_id;
+ struct device *dev = ppe_dev->dev;
+ u32 avail, count, txcmpl_errors;
+ struct edma_txcmpl_desc *txcmpl;
+ u32 prod_idx = 0, more_bit = 0;
+ struct netdev_queue *nq;
+ struct sk_buff *skb;
+ u32 reg;
+
+ cons_idx = txcmpl_ring->cons_idx;
+
+ if (likely(txcmpl_ring->avail_pkt >= work_to_do)) {
+ avail = work_to_do;
+ } else {
+ /* Get TXCMPL ring producer index. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_PROD_IDX(txcmpl_ring->id);
+ regmap_read(regmap, reg, &data);
+ prod_idx = data & EDMA_TXCMPL_PROD_IDX_MASK;
+
+ avail = EDMA_DESC_AVAIL_COUNT(prod_idx, cons_idx, EDMA_TX_RING_SIZE);
+ txcmpl_ring->avail_pkt = avail;
+
+ if (unlikely(!avail)) {
+ dev_dbg(dev, "No available descriptors are pending for %d txcmpl ring\n",
+ txcmpl_ring->id);
+ u64_stats_update_begin(&txcmpl_stats->syncp);
+ ++txcmpl_stats->no_pending_desc;
+ u64_stats_update_end(&txcmpl_stats->syncp);
+ return 0;
+ }
+
+ avail = min(avail, work_to_do);
+ }
+
+ count = avail;
+
+ end_idx = (cons_idx + avail) & EDMA_TX_RING_SIZE_MASK;
+ txcmpl = EDMA_TXCMPL_DESC(txcmpl_ring, cons_idx);
+
+ /* Instead of freeing the skb, it might be better to save and use
+ * for Rxfill.
+ */
+ while (likely(avail--)) {
+ /* The last descriptor holds the SKB pointer for scattered frames.
+ * So skip the descriptors with more bit set.
+ */
+ more_bit = EDMA_TXCMPL_MORE_BIT_GET(txcmpl);
+ if (unlikely(more_bit)) {
+ u64_stats_update_begin(&txcmpl_stats->syncp);
+ ++txcmpl_stats->desc_with_more_bit;
+ u64_stats_update_end(&txcmpl_stats->syncp);
+ cons_idx = ((cons_idx + 1) & EDMA_TX_RING_SIZE_MASK);
+ txcmpl = EDMA_TXCMPL_DESC(txcmpl_ring, cons_idx);
+ continue;
+ }
+
+ /* Find and free the skb for Tx completion. */
+ skb = (struct sk_buff *)EDMA_TXCMPL_OPAQUE_GET(txcmpl);
+ if (unlikely(!skb)) {
+ if (net_ratelimit())
+ dev_warn(dev, "Invalid cons_idx:%u prod_idx:%u word2:%x word3:%x\n",
+ cons_idx, prod_idx, txcmpl->word2, txcmpl->word3);
+
+ u64_stats_update_begin(&txcmpl_stats->syncp);
+ ++txcmpl_stats->invalid_buffer;
+ u64_stats_update_end(&txcmpl_stats->syncp);
+ } else {
+ dev_dbg(dev, "TXCMPL: skb:%p, skb->len %d, skb->data_len %d, cons_idx:%d prod_idx:%d word2:0x%x word3:0x%x\n",
+ skb, skb->len, skb->data_len, cons_idx, prod_idx,
+ txcmpl->word2, txcmpl->word3);
+
+ txcmpl_errors = EDMA_TXCOMP_RING_ERROR_GET(txcmpl->word3);
+ if (unlikely(txcmpl_errors)) {
+ if (net_ratelimit())
+ dev_err(dev, "Error 0x%0x observed in tx complete %d ring\n",
+ txcmpl_errors, txcmpl_ring->id);
+
+ u64_stats_update_begin(&txcmpl_stats->syncp);
+ ++txcmpl_stats->errors;
+ u64_stats_update_end(&txcmpl_stats->syncp);
+ }
+
+ /* Retrieve pool id for unmapping.
+ * 0 for linear skb and (pool id - 1) represents nr_frag index.
+ */
+ if (!EDMA_TXCOMP_POOL_ID_GET(txcmpl)) {
+ dma_unmap_single(dev, virt_to_phys(skb->data),
+ skb->len, DMA_TO_DEVICE);
+ } else {
+ u8 frag_index = (EDMA_TXCOMP_POOL_ID_GET(txcmpl) - 1);
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_index];
+
+ dma_unmap_page(dev, virt_to_phys(frag),
+ PAGE_SIZE, DMA_TO_DEVICE);
+ }
+
+ dev_kfree_skb(skb);
+ }
+
+ cons_idx = ((cons_idx + 1) & EDMA_TX_RING_SIZE_MASK);
+ txcmpl = EDMA_TXCMPL_DESC(txcmpl_ring, cons_idx);
+ }
+
+ txcmpl_ring->cons_idx = cons_idx;
+ txcmpl_ring->avail_pkt -= count;
+
+ dev_dbg(dev, "TXCMPL:%u count:%u prod_idx:%u cons_idx:%u\n",
+ txcmpl_ring->id, count, prod_idx, cons_idx);
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXCMPL_CONS_IDX(txcmpl_ring->id);
+ regmap_write(regmap, reg, cons_idx);
+
+ /* If tx_requeue_stop disabled (tx_requeue_stop = 0)
+ * Fetch the tx queue of interface and check if it is stopped.
+ * if queue is stopped and interface is up, wake up this queue.
+ */
+ if (unlikely(!edma_ctx->tx_requeue_stop)) {
+ cpu_id = smp_processor_id();
+ nq = netdev_get_tx_queue(txcmpl_ring->napi.dev, cpu_id);
+ if (unlikely(netif_tx_queue_stopped(nq)) &&
+ netif_carrier_ok(txcmpl_ring->napi.dev)) {
+ dev_dbg(dev, "Waking queue number %d, for interface %s\n",
+ cpu_id, txcmpl_ring->napi.dev->name);
+ __netif_tx_lock(nq, cpu_id);
+ netif_tx_wake_queue(nq);
+ __netif_tx_unlock(nq);
+ }
+ }
+
+ return count;
+}
+
+/**
+ * edma_tx_napi_poll - EDMA TX NAPI handler.
+ * @napi: NAPI structure.
+ * @budget: Tx NAPI Budget.
+ *
+ * EDMA TX NAPI handler.
+ */
+int edma_tx_napi_poll(struct napi_struct *napi, int budget)
+{
+ struct edma_txcmpl_ring *txcmpl_ring = (struct edma_txcmpl_ring *)napi;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 txcmpl_intr_status;
+ int work_done = 0;
+ u32 data, reg;
+
+ do {
+ work_done += edma_tx_complete(budget - work_done, txcmpl_ring);
+ if (work_done >= budget)
+ return work_done;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_STAT(txcmpl_ring->id);
+ regmap_read(regmap, reg, &data);
+ txcmpl_intr_status = data & EDMA_TXCMPL_RING_INT_STATUS_MASK;
+ } while (txcmpl_intr_status);
+
+ /* No more packets to process. Finish NAPI processing. */
+ napi_complete(napi);
+
+ /* Set TXCMPL ring interrupt mask. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_MASK(txcmpl_ring->id);
+ regmap_write(regmap, reg, edma_ctx->intr_info.intr_mask_txcmpl);
+
+ return work_done;
+}
+
+/**
+ * edma_tx_handle_irq - Tx IRQ Handler.
+ * @irq: Interrupt request.
+ * @ctx: Context.
+ *
+ * Process TX IRQ and schedule NAPI.
+ *
+ * Return IRQ handler code.
+ */
+irqreturn_t edma_tx_handle_irq(int irq, void *ctx)
+{
+ struct edma_txcmpl_ring *txcmpl_ring = (struct edma_txcmpl_ring *)ctx;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 reg;
+
+ pr_debug("irq: irq=%d txcmpl_ring_id=%u\n", irq, txcmpl_ring->id);
+ if (likely(napi_schedule_prep(&txcmpl_ring->napi))) {
+ /* Disable TxCmpl intr. */
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TX_INT_MASK(txcmpl_ring->id);
+ regmap_write(regmap, reg, EDMA_MASK_INT_DISABLE);
+ __napi_schedule(&txcmpl_ring->napi);
+ }
+
+ return IRQ_HANDLED;
+}
+
+static void edma_tx_dma_unmap_frags(struct sk_buff *skb, u32 nr_frags)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+ u32 buf_len = 0;
+ u8 i = 0;
+
+ for (i = 0; i < skb_shinfo(skb)->nr_frags - nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+ /* DMA mapping was not done for zero size segments. */
+ buf_len = skb_frag_size(frag);
+ if (unlikely(buf_len == 0))
+ continue;
+
+ dma_unmap_page(dev, virt_to_phys(frag), PAGE_SIZE,
+ DMA_TO_DEVICE);
+ }
+}
+
+static u32 edma_tx_skb_nr_frags(struct edma_txdesc_ring *txdesc_ring,
+ struct edma_txdesc_pri **txdesc, struct sk_buff *skb,
+ u32 *hw_next_to_use, u32 *invalid_frag)
+{
+ u32 nr_frags = 0, buf_len = 0, num_descs = 0, start_idx = 0, end_idx = 0;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ u32 start_hw_next_to_use = *hw_next_to_use;
+ struct edma_txdesc_pri *txd = *txdesc;
+ struct device *dev = ppe_dev->dev;
+ u8 i = 0;
+
+ /* Hold onto the index mapped to *txdesc.
+ * This will be the index previous to that of current *hw_next_to_use.
+ */
+ start_idx = (((*hw_next_to_use) + EDMA_TX_RING_SIZE_MASK)
+ & EDMA_TX_RING_SIZE_MASK);
+
+ /* Handle if the skb has nr_frags. */
+ nr_frags = skb_shinfo(skb)->nr_frags;
+ num_descs = nr_frags;
+ i = 0;
+
+ while (nr_frags--) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ dma_addr_t buff_addr;
+
+ buf_len = skb_frag_size(frag);
+
+ /* Zero size segment can lead EDMA HW to hang so, we don't want to
+ * process them. Zero size segment can happen during TSO operation
+ * if there is nothing but header in the primary segment.
+ */
+ if (unlikely(buf_len == 0)) {
+ num_descs--;
+ i++;
+ continue;
+ }
+
+ /* Setting the MORE bit on the previous Tx descriptor.
+ * Note: We will flush this descriptor as well later.
+ */
+ EDMA_TXDESC_MORE_BIT_SET(txd, 1);
+ EDMA_TXDESC_ENDIAN_SET(txd);
+
+ txd = EDMA_TXDESC_PRI_DESC(txdesc_ring, *hw_next_to_use);
+ memset(txd, 0, sizeof(struct edma_txdesc_pri));
+ buff_addr = skb_frag_dma_map(dev, frag, 0, buf_len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, buff_addr)) {
+ dev_dbg(dev, "Unable to dma first descriptor for nr_frags tx\n");
+ *hw_next_to_use = start_hw_next_to_use;
+ *invalid_frag = nr_frags;
+ return 0;
+ }
+
+ EDMA_TXDESC_BUFFER_ADDR_SET(txd, buff_addr);
+ EDMA_TXDESC_DATA_LEN_SET(txd, buf_len);
+ EDMA_TXDESC_POOL_ID_SET(txd, (i + 1));
+
+ *hw_next_to_use = ((*hw_next_to_use + 1) & EDMA_TX_RING_SIZE_MASK);
+ i++;
+ }
+
+ EDMA_TXDESC_ENDIAN_SET(txd);
+
+ /* This will be the index previous to that of current *hw_next_to_use. */
+ end_idx = (((*hw_next_to_use) + EDMA_TX_RING_SIZE_MASK) & EDMA_TX_RING_SIZE_MASK);
+
+ *txdesc = txd;
+
+ return num_descs;
+}
+
+static void edma_tx_fill_pp_desc(struct edma_port_priv *port_priv,
+ struct edma_txdesc_pri *txd, struct sk_buff *skb,
+ struct edma_port_tx_stats *stats)
+{
+ struct ppe_port *port = port_priv->ppe_port;
+ int port_id = port->port_id;
+
+ /* Offload L3/L4 checksum computation. */
+ if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
+ EDMA_TXDESC_ADV_OFFLOAD_SET(txd);
+ EDMA_TXDESC_IP_CSUM_SET(txd);
+ EDMA_TXDESC_L4_CSUM_SET(txd);
+ }
+
+ /* Check if the packet needs TSO
+ * This will be mostly true for SG packets.
+ */
+ if (unlikely(skb_is_gso(skb))) {
+ if ((skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) ||
+ (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6)) {
+ u32 mss = skb_shinfo(skb)->gso_size;
+
+ /* If MSS<256, HW will do TSO using MSS=256,
+ * if MSS>10K, HW will do TSO using MSS=10K,
+ * else HW will report error 0x200000 in Tx Cmpl.
+ */
+ if (mss < EDMA_TX_TSO_MSS_MIN)
+ mss = EDMA_TX_TSO_MSS_MIN;
+ else if (mss > EDMA_TX_TSO_MSS_MAX)
+ mss = EDMA_TX_TSO_MSS_MAX;
+
+ EDMA_TXDESC_TSO_ENABLE_SET(txd, 1);
+ EDMA_TXDESC_MSS_SET(txd, mss);
+
+ /* Update tso stats. */
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_tso_pkts++;
+ u64_stats_update_end(&stats->syncp);
+ }
+ }
+
+ /* Set destination information in the descriptor. */
+ EDMA_TXDESC_SERVICE_CODE_SET(txd, PPE_EDMA_SC_BYPASS_ID);
+ EDMA_DST_INFO_SET(txd, port_id);
+}
+
+static struct edma_txdesc_pri *edma_tx_skb_first_desc(struct edma_port_priv *port_priv,
+ struct edma_txdesc_ring *txdesc_ring,
+ struct sk_buff *skb, u32 *hw_next_to_use,
+ struct edma_port_tx_stats *stats)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct edma_txdesc_pri *txd = NULL;
+ struct device *dev = ppe_dev->dev;
+ dma_addr_t buff_addr;
+ u32 buf_len = 0;
+
+ /* Get the packet length. */
+ buf_len = skb_headlen(skb);
+ txd = EDMA_TXDESC_PRI_DESC(txdesc_ring, *hw_next_to_use);
+ memset(txd, 0, sizeof(struct edma_txdesc_pri));
+
+ /* Set the data pointer as the buffer address in the descriptor. */
+ buff_addr = dma_map_single(dev, skb->data, buf_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, buff_addr)) {
+ dev_dbg(dev, "Unable to dma first descriptor for tx\n");
+ return NULL;
+ }
+
+ EDMA_TXDESC_BUFFER_ADDR_SET(txd, buff_addr);
+ EDMA_TXDESC_POOL_ID_SET(txd, 0);
+ edma_tx_fill_pp_desc(port_priv, txd, skb, stats);
+
+ /* Set packet length in the descriptor. */
+ EDMA_TXDESC_DATA_LEN_SET(txd, buf_len);
+ *hw_next_to_use = (*hw_next_to_use + 1) & EDMA_TX_RING_SIZE_MASK;
+
+ return txd;
+}
+
+static void edma_tx_handle_dma_err(struct sk_buff *skb, u32 num_sg_frag_list)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct device *dev = ppe_dev->dev;
+ struct sk_buff *iter_skb = NULL;
+ u32 cnt_sg_frag_list = 0;
+
+ /* Walk through all fraglist skbs. */
+ skb_walk_frags(skb, iter_skb) {
+ if (skb_headlen(iter_skb)) {
+ dma_unmap_single(dev, virt_to_phys(iter_skb->data),
+ skb_headlen(iter_skb), DMA_TO_DEVICE);
+ cnt_sg_frag_list += 1;
+ }
+
+ if (cnt_sg_frag_list == num_sg_frag_list)
+ return;
+
+ /* skb fraglist skb had nr_frags, unmap that memory. */
+ u32 nr_frags = skb_shinfo(iter_skb)->nr_frags;
+
+ if (nr_frags == 0)
+ continue;
+
+ for (int i = 0; i < nr_frags; i++) {
+ skb_frag_t *frag = &skb_shinfo(iter_skb)->frags[i];
+
+ /* DMA mapping was not done for zero size segments. */
+ if (unlikely(skb_frag_size(frag) == 0))
+ continue;
+
+ dma_unmap_page(dev, virt_to_phys(frag),
+ PAGE_SIZE, DMA_TO_DEVICE);
+ cnt_sg_frag_list += 1;
+ if (cnt_sg_frag_list == num_sg_frag_list)
+ return;
+ }
+ }
+}
+
+static u32 edma_tx_skb_sg_fill_desc(struct edma_txdesc_ring *txdesc_ring,
+ struct edma_txdesc_pri **txdesc,
+ struct sk_buff *skb, u32 *hw_next_to_use,
+ struct edma_port_tx_stats *stats)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ u32 start_hw_next_to_use = 0, invalid_frag = 0;
+ struct edma_txdesc_pri *txd = *txdesc;
+ struct device *dev = ppe_dev->dev;
+ struct sk_buff *iter_skb = NULL;
+ u32 buf_len = 0, num_descs = 0;
+ u32 num_sg_frag_list = 0;
+
+ /* Head skb processed already. */
+ num_descs++;
+
+ if (unlikely(skb_has_frag_list(skb))) {
+ struct edma_txdesc_pri *start_desc = NULL;
+ u32 start_idx = 0, end_idx = 0;
+
+ /* Hold onto the index mapped to txd.
+ * This will be the index previous to that of current *hw_next_to_use.
+ */
+ start_idx = (((*hw_next_to_use) + EDMA_TX_RING_SIZE_MASK)
+ & EDMA_TX_RING_SIZE_MASK);
+ start_desc = txd;
+ start_hw_next_to_use = *hw_next_to_use;
+
+ /* Walk through all fraglist skbs. */
+ skb_walk_frags(skb, iter_skb) {
+ dma_addr_t buff_addr;
+ u32 num_nr_frag = 0;
+
+ /* This case could happen during the packet decapsulation.
+ * All header content might be removed.
+ */
+ buf_len = skb_headlen(iter_skb);
+ if (unlikely(buf_len == 0))
+ goto skip_primary;
+
+ /* We make sure to flush this descriptor later. */
+ EDMA_TXDESC_MORE_BIT_SET(txd, 1);
+ EDMA_TXDESC_ENDIAN_SET(txd);
+
+ txd = EDMA_TXDESC_PRI_DESC(txdesc_ring, *hw_next_to_use);
+ memset(txd, 0, sizeof(struct edma_txdesc_pri));
+ buff_addr = dma_map_single(dev, iter_skb->data,
+ buf_len, DMA_TO_DEVICE);
+ if (dma_mapping_error(dev, buff_addr)) {
+ dev_dbg(dev, "Unable to dma for fraglist\n");
+ goto dma_err;
+ }
+
+ EDMA_TXDESC_BUFFER_ADDR_SET(txd, buff_addr);
+ EDMA_TXDESC_DATA_LEN_SET(txd, buf_len);
+ EDMA_TXDESC_POOL_ID_SET(txd, 0);
+
+ *hw_next_to_use = (*hw_next_to_use + 1) & EDMA_TX_RING_SIZE_MASK;
+ num_descs += 1;
+ num_sg_frag_list += 1;
+
+ /* skb fraglist skb can have nr_frags. */
+skip_primary:
+ if (unlikely(skb_shinfo(iter_skb)->nr_frags)) {
+ num_nr_frag = edma_tx_skb_nr_frags(txdesc_ring, &txd,
+ iter_skb, hw_next_to_use,
+ &invalid_frag);
+ if (unlikely(!num_nr_frag)) {
+ dev_dbg(dev, "No descriptor available for ring %d\n",
+ txdesc_ring->id);
+ edma_tx_dma_unmap_frags(iter_skb, invalid_frag);
+ goto dma_err;
+ }
+
+ num_descs += num_nr_frag;
+ num_sg_frag_list += num_nr_frag;
+
+ /* Update fraglist with nr_frag stats. */
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_fraglist_with_nr_frags_pkts++;
+ u64_stats_update_end(&stats->syncp);
+ }
+ }
+
+ EDMA_TXDESC_ENDIAN_SET(txd);
+
+ /* This will be the index previous to
+ * that of current *hw_next_to_use.
+ */
+ end_idx = (((*hw_next_to_use) + EDMA_TX_RING_SIZE_MASK) &
+ EDMA_TX_RING_SIZE_MASK);
+
+ /* Update frag_list stats. */
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_fraglist_pkts++;
+ u64_stats_update_end(&stats->syncp);
+ } else {
+ /* Process skb with nr_frags. */
+ num_descs += edma_tx_skb_nr_frags(txdesc_ring, &txd, skb,
+ hw_next_to_use, &invalid_frag);
+ if (unlikely(!num_descs)) {
+ dev_dbg(dev, "No descriptor available for ring %d\n", txdesc_ring->id);
+ edma_tx_dma_unmap_frags(skb, invalid_frag);
+ *txdesc = NULL;
+ return num_descs;
+ }
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_nr_frag_pkts++;
+ u64_stats_update_end(&stats->syncp);
+ }
+
+ dev_dbg(dev, "skb:%p num_descs_filled: %u, nr_frags %u, frag_list fragments %u\n",
+ skb, num_descs, skb_shinfo(skb)->nr_frags, num_sg_frag_list);
+
+ *txdesc = txd;
+
+ return num_descs;
+
+dma_err:
+ if (!num_sg_frag_list)
+ goto reset_state;
+
+ edma_tx_handle_dma_err(skb, num_sg_frag_list);
+
+reset_state:
+ *hw_next_to_use = start_hw_next_to_use;
+ *txdesc = NULL;
+
+ return 0;
+}
+
+static u32 edma_tx_avail_desc(struct edma_txdesc_ring *txdesc_ring,
+ u32 hw_next_to_use)
+{
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ u32 data = 0, avail = 0, hw_next_to_clean = 0;
+ struct regmap *regmap = ppe_dev->regmap;
+ u32 reg;
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_CONS_IDX(txdesc_ring->id);
+ regmap_read(regmap, reg, &data);
+ hw_next_to_clean = data & EDMA_TXDESC_CONS_IDX_MASK;
+
+ avail = EDMA_DESC_AVAIL_COUNT(hw_next_to_clean - 1,
+ hw_next_to_use, EDMA_TX_RING_SIZE);
+
+ return avail;
+}
+
+/**
+ * edma_tx_ring_xmit - Transmit a packet.
+ * @netdev: Netdevice.
+ * @skb: Socket Buffer.
+ * @txdesc_ring: Tx Descriptor ring.
+ * @stats: EDMA Tx Statistics.
+ *
+ * Check for available descriptors, fill the descriptors
+ * and transmit both linear and non linear packets.
+ *
+ * Return 0 on success, negative error code on failure.
+ */
+enum edma_tx_status edma_tx_ring_xmit(struct net_device *netdev,
+ struct sk_buff *skb, struct edma_txdesc_ring *txdesc_ring,
+ struct edma_port_tx_stats *stats)
+{
+ struct edma_txdesc_stats *txdesc_stats = &txdesc_ring->txdesc_stats;
+ struct edma_port_priv *port_priv = netdev_priv(netdev);
+ u32 num_tx_desc_needed = 0, num_desc_filled = 0;
+ struct ppe_device *ppe_dev = edma_ctx->ppe_dev;
+ struct ppe_port *port = port_priv->ppe_port;
+ struct regmap *regmap = ppe_dev->regmap;
+ struct edma_txdesc_pri *txdesc = NULL;
+ struct device *dev = ppe_dev->dev;
+ int port_id = port->port_id;
+ u32 hw_next_to_use = 0;
+ u32 reg;
+
+ hw_next_to_use = txdesc_ring->prod_idx;
+
+ if (unlikely(!(txdesc_ring->avail_desc))) {
+ txdesc_ring->avail_desc = edma_tx_avail_desc(txdesc_ring,
+ hw_next_to_use);
+ if (unlikely(!txdesc_ring->avail_desc)) {
+ netdev_dbg(netdev, "No available descriptors are present at %d ring\n",
+ txdesc_ring->id);
+
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+ }
+
+ /* Process head skb for linear skb.
+ * Process head skb + nr_frags + fraglist for non linear skb.
+ */
+ if (likely(!skb_is_nonlinear(skb))) {
+ txdesc = edma_tx_skb_first_desc(port_priv, txdesc_ring, skb,
+ &hw_next_to_use, stats);
+ if (unlikely(!txdesc)) {
+ netdev_dbg(netdev, "No descriptor available for ring %d\n",
+ txdesc_ring->id);
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+
+ EDMA_TXDESC_ENDIAN_SET(txdesc);
+ num_desc_filled++;
+ } else {
+ num_tx_desc_needed = edma_tx_num_descs_for_sg(skb);
+
+ /* HW does not support TSO for packets with more than 32 segments.
+ * HW hangs up if it sees more than 32 segments. Kernel Perform GSO
+ * for such packets with netdev gso_max_segs set to 32.
+ */
+ if (unlikely(num_tx_desc_needed > EDMA_TX_TSO_SEG_MAX)) {
+ netdev_dbg(netdev, "Number of segments %u more than %u for %d ring\n",
+ num_tx_desc_needed, EDMA_TX_TSO_SEG_MAX, txdesc_ring->id);
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->tso_max_seg_exceed;
+ u64_stats_update_end(&txdesc_stats->syncp);
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_tso_drop_pkts++;
+ u64_stats_update_end(&stats->syncp);
+
+ return EDMA_TX_FAIL;
+ }
+
+ if (unlikely(num_tx_desc_needed > txdesc_ring->avail_desc)) {
+ txdesc_ring->avail_desc = edma_tx_avail_desc(txdesc_ring,
+ hw_next_to_use);
+ if (num_tx_desc_needed > txdesc_ring->avail_desc) {
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ netdev_dbg(netdev, "Not enough available descriptors are present at %d ring for SG packet. Needed %d, currently available %d\n",
+ txdesc_ring->id, num_tx_desc_needed,
+ txdesc_ring->avail_desc);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+ }
+
+ txdesc = edma_tx_skb_first_desc(port_priv, txdesc_ring, skb,
+ &hw_next_to_use, stats);
+ if (unlikely(!txdesc)) {
+ netdev_dbg(netdev, "No non-linear descriptor available for ring %d\n",
+ txdesc_ring->id);
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+
+ num_desc_filled = edma_tx_skb_sg_fill_desc(txdesc_ring,
+ &txdesc, skb, &hw_next_to_use, stats);
+ if (unlikely(!txdesc)) {
+ netdev_dbg(netdev, "No descriptor available for ring %d\n",
+ txdesc_ring->id);
+ dma_unmap_single(dev, virt_to_phys(skb->data),
+ skb->len, DMA_TO_DEVICE);
+ u64_stats_update_begin(&txdesc_stats->syncp);
+ ++txdesc_stats->no_desc_avail;
+ u64_stats_update_end(&txdesc_stats->syncp);
+ return EDMA_TX_FAIL_NO_DESC;
+ }
+ }
+
+ /* Set the skb pointer to the descriptor's opaque field/s
+ * on the last descriptor of the packet/SG packet.
+ */
+ EDMA_TXDESC_OPAQUE_SET(txdesc, skb);
+
+ /* Update producer index. */
+ txdesc_ring->prod_idx = hw_next_to_use & EDMA_TXDESC_PROD_IDX_MASK;
+ txdesc_ring->avail_desc -= num_desc_filled;
+
+ netdev_dbg(netdev, "%s: skb:%p tx_ring:%u proto:0x%x skb->len:%d\n port:%u prod_idx:%u ip_summed:0x%x\n",
+ netdev->name, skb, txdesc_ring->id, ntohs(skb->protocol),
+ skb->len, port_id, hw_next_to_use, skb->ip_summed);
+
+ reg = EDMA_BASE_OFFSET + EDMA_REG_TXDESC_PROD_IDX(txdesc_ring->id);
+ regmap_write(regmap, reg, txdesc_ring->prod_idx);
+
+ u64_stats_update_begin(&stats->syncp);
+ stats->tx_pkts++;
+ stats->tx_bytes += skb->len;
+ u64_stats_update_end(&stats->syncp);
+
+ return EDMA_TX_OK;
+}
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/ppe/edma_tx.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved.
+ */
+
+#ifndef __EDMA_TX__
+#define __EDMA_TX__
+
+#include "edma_port.h"
+
+#define EDMA_GET_DESC(R, i, type) (&(((type *)((R)->desc))[(i)]))
+#define EDMA_GET_PDESC(R, i, type) (&(((type *)((R)->pdesc))[(i)]))
+#define EDMA_GET_SDESC(R, i, type) (&(((type *)((R)->sdesc))[(i)]))
+#define EDMA_TXCMPL_DESC(R, i) EDMA_GET_DESC(R, i, \
+ struct edma_txcmpl_desc)
+#define EDMA_TXDESC_PRI_DESC(R, i) EDMA_GET_PDESC(R, i, \
+ struct edma_txdesc_pri)
+#define EDMA_TXDESC_SEC_DESC(R, i) EDMA_GET_SDESC(R, i, \
+ struct edma_txdesc_sec)
+
+#define EDMA_DESC_AVAIL_COUNT(head, tail, _max) ({ \
+ typeof(_max) (max) = (_max); \
+ ((((head) - (tail)) + \
+ (max)) & ((max) - 1)); })
+
+#define EDMA_TX_RING_SIZE 2048
+#define EDMA_TX_RING_SIZE_MASK (EDMA_TX_RING_SIZE - 1)
+
+/* Max segment processing capacity of HW for TSO. */
+#define EDMA_TX_TSO_SEG_MAX 32
+
+/* HW defined low and high MSS size. */
+#define EDMA_TX_TSO_MSS_MIN 256
+#define EDMA_TX_TSO_MSS_MAX 10240
+
+#define EDMA_DST_PORT_TYPE 2
+#define EDMA_DST_PORT_TYPE_SHIFT 28
+#define EDMA_DST_PORT_TYPE_MASK (0xf << EDMA_DST_PORT_TYPE_SHIFT)
+#define EDMA_DST_PORT_ID_SHIFT 16
+#define EDMA_DST_PORT_ID_MASK (0xfff << EDMA_DST_PORT_ID_SHIFT)
+
+#define EDMA_DST_PORT_TYPE_SET(x) (((x) << EDMA_DST_PORT_TYPE_SHIFT) & \
+ EDMA_DST_PORT_TYPE_MASK)
+#define EDMA_DST_PORT_ID_SET(x) (((x) << EDMA_DST_PORT_ID_SHIFT) & \
+ EDMA_DST_PORT_ID_MASK)
+#define EDMA_DST_INFO_SET(desc, x) ((desc)->word4 |= \
+ (EDMA_DST_PORT_TYPE_SET(EDMA_DST_PORT_TYPE) | EDMA_DST_PORT_ID_SET(x)))
+
+#define EDMA_TXDESC_TSO_ENABLE_MASK BIT(24)
+#define EDMA_TXDESC_TSO_ENABLE_SET(desc, x) ((desc)->word5 |= \
+ FIELD_PREP(EDMA_TXDESC_TSO_ENABLE_MASK, x))
+#define EDMA_TXDESC_MSS_MASK GENMASK(31, 16)
+#define EDMA_TXDESC_MSS_SET(desc, x) ((desc)->word6 |= \
+ FIELD_PREP(EDMA_TXDESC_MSS_MASK, x))
+#define EDMA_TXDESC_MORE_BIT_MASK BIT(30)
+#define EDMA_TXDESC_MORE_BIT_SET(desc, x) ((desc)->word1 |= \
+ FIELD_PREP(EDMA_TXDESC_MORE_BIT_MASK, x))
+
+#define EDMA_TXDESC_ADV_OFFSET_BIT BIT(31)
+#define EDMA_TXDESC_ADV_OFFLOAD_SET(desc) ((desc)->word5 |= \
+ FIELD_PREP(EDMA_TXDESC_ADV_OFFSET_BIT, 1))
+#define EDMA_TXDESC_IP_CSUM_BIT BIT(25)
+#define EDMA_TXDESC_IP_CSUM_SET(desc) ((desc)->word5 |= \
+ FIELD_PREP(EDMA_TXDESC_IP_CSUM_BIT, 1))
+
+#define EDMA_TXDESC_L4_CSUM_SET_MASK GENMASK(27, 26)
+#define EDMA_TXDESC_L4_CSUM_SET(desc) ((desc)->word5 |= \
+ (FIELD_PREP(EDMA_TXDESC_L4_CSUM_SET_MASK, 1)))
+
+#define EDMA_TXDESC_POOL_ID_SET_MASK GENMASK(24, 18)
+#define EDMA_TXDESC_POOL_ID_SET(desc, x) ((desc)->word5 |= \
+ (FIELD_PREP(EDMA_TXDESC_POOL_ID_SET_MASK, x)))
+
+#define EDMA_TXDESC_DATA_LEN_SET(desc, x) ((desc)->word5 |= ((x) & 0x1ffff))
+#define EDMA_TXDESC_SERVICE_CODE_MASK GENMASK(24, 16)
+#define EDMA_TXDESC_SERVICE_CODE_SET(desc, x) ((desc)->word1 |= \
+ (FIELD_PREP(EDMA_TXDESC_SERVICE_CODE_MASK, x)))
+#define EDMA_TXDESC_BUFFER_ADDR_SET(desc, addr) (((desc)->word0) = (addr))
+
+#ifdef __LP64__
+#define EDMA_TXDESC_OPAQUE_GET(_desc) ({ \
+ typeof(_desc) (desc) = (_desc); \
+ (((u64)(desc)->word3 << 32) | (desc)->word2); })
+
+#define EDMA_TXCMPL_OPAQUE_GET(_desc) ({ \
+ typeof(_desc) (desc) = (_desc); \
+ (((u64)(desc)->word1 << 32) | \
+ (desc)->word0); })
+
+#define EDMA_TXDESC_OPAQUE_LO_SET(desc, ptr) ((desc)->word2 = \
+ (u32)(uintptr_t)(ptr))
+
+#define EDMA_TXDESC_OPAQUE_HI_SET(desc, ptr) ((desc)->word3 = \
+ (u32)((u64)(ptr) >> 32))
+
+#define EDMA_TXDESC_OPAQUE_SET(_desc, _ptr) do { \
+ typeof(_desc) (desc) = (_desc); \
+ typeof(_ptr) (ptr) = (_ptr); \
+ EDMA_TXDESC_OPAQUE_LO_SET(desc, ptr); \
+ EDMA_TXDESC_OPAQUE_HI_SET(desc, ptr); \
+} while (0)
+#else
+#define EDMA_TXCMPL_OPAQUE_GET(desc) ((desc)->word0)
+#define EDMA_TXDESC_OPAQUE_GET(desc) ((desc)->word2)
+#define EDMA_TXDESC_OPAQUE_LO_SET(desc, ptr) ((desc)->word2 = (u32)(uintptr_t)ptr)
+
+#define EDMA_TXDESC_OPAQUE_SET(desc, ptr) \
+ EDMA_TXDESC_OPAQUE_LO_SET(desc, ptr)
+#endif
+#define EDMA_TXCMPL_MORE_BIT_MASK BIT(30)
+
+#define EDMA_TXCMPL_MORE_BIT_GET(desc) ((le32_to_cpu((__force __le32)((desc)->word2))) & \
+ EDMA_TXCMPL_MORE_BIT_MASK)
+
+#define EDMA_TXCOMP_RING_ERROR_MASK GENMASK(22, 0)
+
+#define EDMA_TXCOMP_RING_ERROR_GET(x) ((le32_to_cpu((__force __le32)x)) & \
+ EDMA_TXCOMP_RING_ERROR_MASK)
+
+#define EDMA_TXCOMP_POOL_ID_MASK GENMASK(5, 0)
+
+#define EDMA_TXCOMP_POOL_ID_GET(desc) ((le32_to_cpu((__force __le32)((desc)->word2))) & \
+ EDMA_TXCOMP_POOL_ID_MASK)
+
+/* Opaque values are set in word2 and word3,
+ * they are not accessed by the EDMA HW,
+ * so endianness conversion is not needed.
+ */
+#define EDMA_TXDESC_ENDIAN_SET(_desc) ({ \
+ typeof(_desc) (desc) = (_desc); \
+ cpu_to_le32s(&((desc)->word0)); \
+ cpu_to_le32s(&((desc)->word1)); \
+ cpu_to_le32s(&((desc)->word4)); \
+ cpu_to_le32s(&((desc)->word5)); \
+ cpu_to_le32s(&((desc)->word6)); \
+ cpu_to_le32s(&((desc)->word7)); \
+})
+
+/* EDMA Tx GSO status */
+enum edma_tx_status {
+ EDMA_TX_OK = 0, /* Tx success. */
+ EDMA_TX_FAIL_NO_DESC = 1, /* Not enough descriptors. */
+ EDMA_TX_FAIL = 2, /* Tx failure. */
+};
+
+/* EDMA TX GSO status */
+enum edma_tx_gso_status {
+ EDMA_TX_GSO_NOT_NEEDED = 0,
+ /* Packet has segment count less than TX_TSO_SEG_MAX. */
+ EDMA_TX_GSO_SUCCEED = 1,
+ /* GSO Succeed. */
+ EDMA_TX_GSO_FAIL = 2,
+ /* GSO failed, drop the packet. */
+};
+
+/**
+ * struct edma_txcmpl_stats - EDMA TX complete ring statistics.
+ * @invalid_buffer: Invalid buffer address received.
+ * @errors: Other Tx complete descriptor errors indicated by the hardware.
+ * @desc_with_more_bit: Packet's segment transmit count.
+ * @no_pending_desc: No descriptor is pending for processing.
+ * @syncp: Synchronization pointer.
+ */
+struct edma_txcmpl_stats {
+ u64 invalid_buffer;
+ u64 errors;
+ u64 desc_with_more_bit;
+ u64 no_pending_desc;
+ struct u64_stats_sync syncp;
+};
+
+/**
+ * struct edma_txdesc_stats - EDMA Tx descriptor ring statistics.
+ * @no_desc_avail: No descriptor available to transmit.
+ * @tso_max_seg_exceed: Packets extending EDMA_TX_TSO_SEG_MAX segments.
+ * @syncp: Synchronization pointer.
+ */
+struct edma_txdesc_stats {
+ u64 no_desc_avail;
+ u64 tso_max_seg_exceed;
+ struct u64_stats_sync syncp;
+};
+
+/**
+ * struct edma_txdesc_pri - EDMA primary TX descriptor.
+ * @word0: Low 32-bit of buffer address.
+ * @word1: Buffer recycling, PTP tag flag, PRI valid flag.
+ * @word2: Low 32-bit of opaque value.
+ * @word3: High 32-bit of opaque value.
+ * @word4: Source/Destination port info.
+ * @word5: VLAN offload, csum mode, ip_csum_en, tso_en, data len.
+ * @word6: MSS/hash_value/PTP tag, data offset.
+ * @word7: L4/L3 offset, PROT type, L2 type, CVLAN/SVLAN tag, service code.
+ */
+struct edma_txdesc_pri {
+ u32 word0;
+ u32 word1;
+ u32 word2;
+ u32 word3;
+ u32 word4;
+ u32 word5;
+ u32 word6;
+ u32 word7;
+};
+
+/**
+ * struct edma_txdesc_sec - EDMA secondary TX descriptor.
+ * @word0: Reserved.
+ * @word1: Custom csum offset, payload offset, TTL/NAT action.
+ * @word2: NAPT translated port, DSCP value, TTL value.
+ * @word3: Flow index value and valid flag.
+ * @word4: Reserved.
+ * @word5: Reserved.
+ * @word6: CVLAN/SVLAN command.
+ * @word7: CVLAN/SVLAN tag value.
+ */
+struct edma_txdesc_sec {
+ u32 word0;
+ u32 word1;
+ u32 word2;
+ u32 word3;
+ u32 word4;
+ u32 word5;
+ u32 word6;
+ u32 word7;
+};
+
+/**
+ * struct edma_txcmpl_desc - EDMA TX complete descriptor.
+ * @word0: Low 32-bit opaque value.
+ * @word1: High 32-bit opaque value.
+ * @word2: More fragment, transmit ring id, pool id.
+ * @word3: Error indications.
+ */
+struct edma_txcmpl_desc {
+ u32 word0;
+ u32 word1;
+ u32 word2;
+ u32 word3;
+};
+
+/**
+ * struct edma_txdesc_ring - EDMA TX descriptor ring
+ * @prod_idx: Producer index
+ * @id: Tx ring number
+ * @avail_desc: Number of available descriptor to process
+ * @pdesc: Primary descriptor ring virtual address
+ * @pdma: Primary descriptor ring physical address
+ * @sdesc: Secondary descriptor ring virtual address
+ * @tx_desc_stats: Tx descriptor ring statistics
+ * @sdma: Secondary descriptor ring physical address
+ * @count: Number of descriptors
+ * @fc_grp_id: Flow control group ID
+ */
+struct edma_txdesc_ring {
+ u32 prod_idx;
+ u32 id;
+ u32 avail_desc;
+ struct edma_txdesc_pri *pdesc;
+ dma_addr_t pdma;
+ struct edma_txdesc_sec *sdesc;
+ struct edma_txdesc_stats txdesc_stats;
+ dma_addr_t sdma;
+ u32 count;
+ u8 fc_grp_id;
+};
+
+/**
+ * struct edma_txcmpl_ring - EDMA TX complete ring
+ * @napi: NAPI
+ * @cons_idx: Consumer index
+ * @avail_pkt: Number of available packets to process
+ * @desc: Descriptor ring virtual address
+ * @id: Txcmpl ring number
+ * @tx_cmpl_stats: Tx complete ring statistics
+ * @dma: Descriptor ring physical address
+ * @count: Number of descriptors in the ring
+ * @napi_added: Flag to indicate NAPI add status
+ */
+struct edma_txcmpl_ring {
+ struct napi_struct napi;
+ u32 cons_idx;
+ u32 avail_pkt;
+ struct edma_txcmpl_desc *desc;
+ u32 id;
+ struct edma_txcmpl_stats txcmpl_stats;
+ dma_addr_t dma;
+ u32 count;
+ bool napi_added;
+};
+
+enum edma_tx_status edma_tx_ring_xmit(struct net_device *netdev,
+ struct sk_buff *skb,
+ struct edma_txdesc_ring *txdesc_ring,
+ struct edma_port_tx_stats *stats);
+u32 edma_tx_complete(u32 work_to_do,
+ struct edma_txcmpl_ring *txcmpl_ring);
+irqreturn_t edma_tx_handle_irq(int irq, void *ctx);
+int edma_tx_napi_poll(struct napi_struct *napi, int budget);
+enum edma_tx_gso_status edma_tx_gso_segment(struct sk_buff *skb,
+ struct net_device *netdev, struct sk_buff **segs);
+
+#endif