lede/target/linux/bcm27xx/patches-6.12/950-0985-PCI-brcmstb-Add-link-statistics-debug-features.patch
=?UTF-8?q?=C3=81lvaro=20Fern=C3=A1ndez=20Rojas?= d81c03f05e bcm27xx: add 6.12 patches from RPi repo
These patches were generated from:
https://github.com/raspberrypi/linux/commits/rpi-6.12.y
With the following command:
git format-patch -N v6.12.27..HEAD
(HEAD -> 8d3206ee456a5ecdf9ddbfd8e5e231e4f0cd716e)

Exceptions:
- (def)configs patches
- github workflows patches
- applied & reverted patches
- readme patches
- wireless patches

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
2025-06-20 17:01:06 +08:00

482 lines
14 KiB
Diff

From 95059173ffd6aafe65953bd147d39e1b71d8471f Mon Sep 17 00:00:00 2001
From: Jonathan Bell <jonathan@raspberrypi.com>
Date: Thu, 27 Mar 2025 16:53:58 +0000
Subject: [PATCH] PCI: brcmstb: Add link statistics debug features
Add two features that assist in diagnosing link instability issues.
The debugfs additions allow for snapshots of the Physical Layer
statistics registers to be taken, during either free-running capture or
after a hardware-controlled capture interval.
To arm the capture engine (and reset the stats counters), write an
integer N to:
/sys/kernel/debug/pcie@<addr>/stats_trigger
The engine will run forever with a value of 0, or disarm after N
microseconds.
To snapshot the hardware stats counters, write to:
/sys/kernel/debug/pcie@<addr>/stats_snapshot
Reading this file will return the snapshot. If no writes have occurred
since boot, the snapshot will be of the initial link training period.
The ltssm_trace module parameter printk's the states during initial link
startup, in situations where failure to establish the link is a fatal
error.
Signed-off-by: Jonathan Bell <jonathan@raspberrypi.com>
---
drivers/pci/controller/pcie-brcmstb.c | 369 ++++++++++++++++++++++++++
1 file changed, 369 insertions(+)
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -5,6 +5,7 @@
#include <linux/bitops.h>
#include <linux/clk.h>
#include <linux/compiler.h>
+#include <linux/debugfs.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/interrupt.h>
@@ -62,6 +63,34 @@
#define PCIE_RC_PL_PHY_CTL_15_DIS_PLL_PD_MASK 0x400000
#define PCIE_RC_PL_PHY_CTL_15_PM_CLK_PERIOD_MASK 0xff
+#define PCIE_RC_PL_STATS_CTRL 0x1940
+#define PCIE_RC_PL_STATS_CTRL_EN_MASK 0x1
+#define PCIE_RC_PL_STATS_CTRL_LEN_MASK 0xfffffff0
+
+#define PCIE_RC_PL_STATS_TXTLP_LO 0x1944
+#define PCIE_RC_PL_STATS_TXTLP_HI 0x1948
+#define PCIE_RC_PL_STATS_TXDLLP_LO 0x194c
+#define PCIE_RC_PL_STATS_TXDLLP_HI 0x1950
+#define PCIE_RC_PL_STATS_RXTLP_LO 0x195c
+#define PCIE_RC_PL_STATS_RXTLP_HI 0x1960
+#define PCIE_RC_PL_STATS_RXDLLP_LO 0x1964
+#define PCIE_RC_PL_STATS_RXDLLP_HI 0x1968
+#define PCIE_RC_PL_STATS_RXPL_ERR 0x1974
+#define PCIE_RC_PL_STATS_RXDL_ERR 0x1978
+#define PCIE_RC_PL_STATS_RXTL_ERR 0x197c
+
+#define PCIE_RC_PL_LTSSM_STATS_3 0x19b0
+#define PCIE_RC_PL_LTSSM_STATS_3_TIME_L0S_MASK 0xffff0000
+#define PCIE_RC_PL_LTSSM_STATS_3_TIME_RECOV_MASK 0x0000ffff
+
+#define PCIE_RC_PL_LTSSM_STATS_CNT 0x19b4
+#define PCIE_RC_PL_LTSSM_STATS_CNT_L0S_FAIL_MASK 0xffff0000
+#define PCIE_RC_PL_LTSSM_STATS_CNT_RECOV_MASK 0x0000ffff
+
+#define PCIE_RC_PL_LTSSM_HIST_0 0x1cec
+#define PCIE_RC_PL_LTSSM_HIST(n) \
+ (PCIE_RC_PL_LTSSM_HIST_0 + ((n) * 4))
+
#define PCIE_MISC_MISC_CTRL 0x4008
#define PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK 0x80
#define PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK 0x400
@@ -296,6 +325,91 @@ struct inbound_win {
u64 cpu_addr;
};
+#define TRACE_BUF_LEN 64
+struct trace_entry {
+ u8 st;
+ ktime_t ts;
+};
+
+static const char *brcm_pcie_decode_ltssm_state(u8 state)
+{
+ switch (state) {
+ case 0x01:
+ return "L0";
+ case 0x02:
+ return "L1";
+ case 0x03:
+ return "L2";
+ case 0x04:
+ return "RxL0s_TxL0s";
+ case 0x05:
+ return "RxL0_TxL0s";
+ case 0x09:
+ return "Polling.Active";
+ case 0x0A:
+ return "Polling.Configuration";
+ case 0x0C:
+ return "Polling.Compliance";
+ case 0x10:
+ return "Configuration.Link";
+ case 0x11:
+ return "Configuration.Linkwidth.Accept";
+ case 0x12:
+ return "Configuration.Lanenum.Wait";
+ case 0x13:
+ return "Confguration.Lanenum.Accept";
+ case 0x14:
+ return "Confguration.Complete";
+ case 0x15:
+ return "Configuration.Idle";
+ case 0x20:
+ return "Recovery.RcvrLock";
+ case 0x21:
+ return "Recovery.RcvrCfg";
+ case 0x22:
+ return "Recovery.Idle";
+ case 0x23:
+ return "Recovery.Speed";
+ case 0x24:
+ return "Recovery.EQ_Phase0";
+ case 0x25:
+ return "Recovery.EQ_Phase1";
+ case 0x26:
+ return "Recovery.EQ_Phase2";
+ case 0x27:
+ return "Recovery.EQ_Phase3";
+ case 0x40:
+ return "Disable";
+ case 0x43:
+ return "Reset";
+ case 0x47:
+ return "Loopback.Entry";
+ case 0x48:
+ return "Loopback.Active";
+ case 0x49:
+ return "Loopback.Exit";
+ case 0x4C:
+ return "Loopback.Master.Entry";
+ case 0x4D:
+ return "Loopback.Master.Active";
+ case 0x4E:
+ return "Loopback.Master.Exit";
+ case 0x70:
+ return "Recovery.Speed_Change";
+ case 0x80:
+ return "Detect.Quiet";
+ case 0x81:
+ return "Detect.Active";
+ case 0x82:
+ return "Detect.Wait";
+ case 0x83:
+ return "Detect.Active2";
+ default:
+ break;
+ }
+ return NULL;
+};
+
/*
* The RESCAL block is tied to PCIe controller #1, regardless of the number of
* controllers, and turning off PCIe controller #1 prevents access to the RESCAL
@@ -366,6 +480,21 @@ struct brcm_pcie {
bool ep_wakeup_capable;
const struct pcie_cfg_data *cfg;
u32 tperst_clk_ms;
+ bool trace_ltssm;
+ struct trace_entry *ltssm_trace_buf;
+ /* Statistics exposed in debugfs */
+ struct dentry *debugfs_dir;
+ u64 tx_tlp;
+ u64 rx_tlp;
+ u64 tx_dllp;
+ u64 rx_dllp;
+ u32 pl_rx_err;
+ u32 dl_rx_err;
+ u32 tl_rx_err;
+ u16 l0s_exit_time;
+ u16 recov_time;
+ u16 l0s_fail_cnt;
+ u16 recov_cnt;
};
static inline bool is_bmips(const struct brcm_pcie *pcie)
@@ -1495,6 +1624,133 @@ static void brcm_config_clkreq(struct br
dev_info(pcie->dev, "clkreq-mode set to %s\n", mode);
}
+static void brcm_pcie_stats_trigger(struct brcm_pcie *pcie, u32 micros)
+{
+ u32 tmp;
+
+ /*
+ * A 0->1 transition on CTRL_EN is required to clear counters and start capture.
+ * A microseconds count of 0 starts continuous gathering.
+ */
+ tmp = readl(pcie->base + PCIE_RC_PL_STATS_CTRL);
+ u32p_replace_bits(&tmp, 0, PCIE_RC_PL_STATS_CTRL_EN_MASK);
+ writel(tmp, pcie->base + PCIE_RC_PL_STATS_CTRL);
+
+ if (micros >= (1 << 28))
+ micros = (1 << 28) - 1U;
+ u32p_replace_bits(&tmp, micros, PCIE_RC_PL_STATS_CTRL_LEN_MASK);
+ u32p_replace_bits(&tmp, 1, PCIE_RC_PL_STATS_CTRL_EN_MASK);
+
+ writel(tmp, pcie->base + PCIE_RC_PL_STATS_CTRL);
+}
+
+static void brcm_pcie_stats_capture(struct brcm_pcie *pcie)
+{
+ u32 tmp;
+
+ /* Snapshot the counters - capture engine may still be running */
+ pcie->tx_tlp = (u64)readl(pcie->base + PCIE_RC_PL_STATS_TXTLP_LO) +
+ ((u64)readl(pcie->base + PCIE_RC_PL_STATS_TXTLP_HI) << 32ULL);
+ pcie->rx_tlp = (u64)readl(pcie->base + PCIE_RC_PL_STATS_RXTLP_LO) +
+ ((u64)readl(pcie->base + PCIE_RC_PL_STATS_RXTLP_HI) << 32ULL);
+ pcie->tx_dllp = (u64)readl(pcie->base + PCIE_RC_PL_STATS_TXDLLP_LO) +
+ ((u64)readl(pcie->base + PCIE_RC_PL_STATS_TXDLLP_HI) << 32ULL);
+ pcie->rx_dllp = (u64)readl(pcie->base + PCIE_RC_PL_STATS_RXDLLP_LO) +
+ ((u64)readl(pcie->base + PCIE_RC_PL_STATS_RXDLLP_HI) << 32ULL);
+
+ pcie->pl_rx_err = readl(pcie->base + PCIE_RC_PL_STATS_RXPL_ERR);
+ pcie->dl_rx_err = readl(pcie->base + PCIE_RC_PL_STATS_RXDL_ERR);
+ pcie->tl_rx_err = readl(pcie->base + PCIE_RC_PL_STATS_RXTL_ERR);
+
+ tmp = readl(pcie->base + PCIE_RC_PL_LTSSM_STATS_3);
+ pcie->l0s_exit_time = FIELD_GET(PCIE_RC_PL_LTSSM_STATS_3_TIME_L0S_MASK, tmp);
+ pcie->recov_time = FIELD_GET(PCIE_RC_PL_LTSSM_STATS_3_TIME_RECOV_MASK, tmp);
+
+ tmp = readl(pcie->base + PCIE_RC_PL_LTSSM_STATS_CNT);
+ pcie->l0s_fail_cnt = FIELD_GET(PCIE_RC_PL_LTSSM_STATS_CNT_L0S_FAIL_MASK, tmp);
+ pcie->recov_cnt = FIELD_GET(PCIE_RC_PL_LTSSM_STATS_CNT_RECOV_MASK, tmp);
+}
+
+/*
+ * Dump the link state machine transitions for the first 100ms after fundamental reset release.
+ * Most link training completes in a far shorter time.
+ *
+ * The CPU-intensive nature of the capture means that this should only be used to
+ * diagnose fatal link startup failures.
+ */
+static void brcm_pcie_trace_link_start(struct brcm_pcie *pcie)
+{
+ struct device *dev = pcie->dev;
+ struct trace_entry *trace = pcie->ltssm_trace_buf;
+ int i = 0, j = 0;
+ u8 cur_state;
+ u32 ltssm_hist0, ltssm_hist1 = 0;
+ ktime_t start, timeout;
+
+ start = ktime_get();
+ timeout = ktime_add(start, ktime_set(0, NSEC_PER_MSEC * 100));
+ /*
+ * The LTSSM history registers are implemented as an "open FIFO" where register data
+ * shuffles along with each push - or moves under one's feet, if you prefer.
+ * We can't atomically read more than one 32bit value (covering 4 entries), so poll
+ * quickly while guessing the position of the first value we haven't seen yet.
+ */
+ do {
+ /*
+ * This delay appears to work around a HW bug where data can temporarily
+ * appear nibble-shifted.
+ */
+ ndelay(10);
+ /* Snapshot the FIFO state. Lowest different "byte" is latest data. */
+ ltssm_hist0 = readl(pcie->base + PCIE_RC_PL_LTSSM_HIST(3));
+ if (ltssm_hist0 == ltssm_hist1)
+ continue;
+ ltssm_hist1 = ltssm_hist0;
+
+ /*
+ * If the "fifo" has changed, we don't know by how much.
+ * Scan through byte-wise and look for states
+ */
+ for (j = 24; j >= 0; j -= 8) {
+ cur_state = (ltssm_hist0 >> j) & 0xff;
+ /* Unassigned entry */
+ if (cur_state == 0xff)
+ continue;
+ if (i > 0 && trace[i-1].st == cur_state) {
+ /*
+ * This is probably what we last saw.
+ * Next byte should be a new entry.
+ */
+ j -= 8;
+ break;
+ } else if (i == 0 && brcm_pcie_decode_ltssm_state(cur_state)) {
+ /* Probably a new valid entry */
+ break;
+ }
+ }
+
+ for (; j >= 0 && i < TRACE_BUF_LEN; j -= 8) {
+ cur_state = (ltssm_hist0 >> j) & 0xff;
+ trace[i].st = cur_state;
+ trace[i].ts = ktime_sub(ktime_get(), start);
+ i++;
+ }
+ if (i == TRACE_BUF_LEN)
+ break;
+ } while (!ktime_after(ktime_get(), timeout));
+
+ dev_info(dev, "LTSSM trace captured %d events (max %u):\n",
+ i, TRACE_BUF_LEN);
+ for (i = 0; i < TRACE_BUF_LEN; i++) {
+ if (!trace[i].st)
+ break;
+ dev_info(dev, "%llu : %02x - %s\n",
+ ktime_to_us(trace[i].ts),
+ trace[i].st,
+ brcm_pcie_decode_ltssm_state(trace[i].st));
+ }
+}
+
static int brcm_pcie_start_link(struct brcm_pcie *pcie)
{
struct device *dev = pcie->dev;
@@ -1508,6 +1764,8 @@ static int brcm_pcie_start_link(struct b
if (pcie->gen)
brcm_pcie_set_gen(pcie, pcie->gen);
+ brcm_pcie_stats_trigger(pcie, 0);
+
/* Unassert the fundamental reset */
if (pcie->tperst_clk_ms) {
/*
@@ -1531,6 +1789,8 @@ static int brcm_pcie_start_link(struct b
if (ret)
return ret;
+ if (pcie->trace_ltssm)
+ brcm_pcie_trace_link_start(pcie);
/*
* Wait for 100ms after PERST# deassertion; see PCIe CEM specification
* sections 2.2, PCIe r5.0, 6.6.1.
@@ -1567,6 +1827,9 @@ static int brcm_pcie_start_link(struct b
pci_speed_string(pcie_link_speed[cls]), nlw,
ssc_good ? "(SSC)" : "(!SSC)");
+ /* Snapshot the boot-time stats */
+ brcm_pcie_stats_capture(pcie);
+
/*
* RootCtl bits are reset by perst_n, which undoes pci_enable_crs()
* called prior to pci_add_new_bus() during probe. Re-enable here.
@@ -1898,6 +2161,7 @@ err_disable_clk:
static void __brcm_pcie_remove(struct brcm_pcie *pcie)
{
+ debugfs_remove_recursive(pcie->debugfs_dir);
brcm_msi_remove(pcie);
brcm_pcie_turn_off(pcie);
if (brcm_phy_stop(pcie))
@@ -2054,6 +2318,98 @@ static struct pci_ops brcm7425_pcie_ops
.remove_bus = brcm_pcie_remove_bus,
};
+static ssize_t debugfs_stats_trigger_write(struct file *filp,
+ const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *m = filp->private_data;
+ struct brcm_pcie *pcie = m->private;
+ char kbuf[16] = {};
+ unsigned long micros;
+
+ if (count > sizeof(kbuf))
+ return -EOVERFLOW;
+
+ if (copy_from_user(kbuf, buf, count))
+ return -EINVAL;
+
+ if (kstrtol(kbuf, 0, &micros) < 0)
+ return -EINVAL;
+
+ if (micros >= (1 << 28))
+ return -ERANGE;
+
+ brcm_pcie_stats_trigger(pcie, micros);
+ return count;
+}
+
+static int debugfs_stats_trigger_show(struct seq_file *s, void *unused)
+{
+ struct brcm_pcie *pcie = s->private;
+ u32 tmp;
+
+ /* Return the state of the capture engine */
+ tmp = readl(pcie->base + PCIE_RC_PL_STATS_CTRL);
+ tmp = FIELD_GET(PCIE_RC_PL_STATS_CTRL_EN_MASK, tmp);
+ seq_printf(s, "%u\n", tmp);
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_stats_trigger);
+
+static ssize_t debugfs_stats_snapshot_write(struct file *filp,
+ const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *m = filp->private_data;
+ struct brcm_pcie *pcie = m->private;
+
+ /* Any write triggers a snapshot of the stats register set */
+ brcm_pcie_stats_capture(pcie);
+ return count;
+}
+
+static int debugfs_stats_snapshot_show(struct seq_file *s, void *unused)
+{
+ struct brcm_pcie *pcie = s->private;
+
+ seq_printf(s, "tx_tlp:\t\t%llu\n", pcie->tx_tlp);
+ seq_printf(s, "rx_tlp:\t\t%llu\n", pcie->rx_tlp);
+ seq_printf(s, "tx_dllp:\t%llu\n", pcie->tx_dllp);
+ seq_printf(s, "rx_dllp:\t%llu\n", pcie->rx_dllp);
+ seq_printf(s, "pl_rx_err:\t%u\n", pcie->pl_rx_err);
+ seq_printf(s, "dl_rx_err:\t%u\n", pcie->dl_rx_err);
+ seq_printf(s, "tl_rx_err:\t%u\n", pcie->tl_rx_err);
+ seq_printf(s, "l0s_exit_time:\t%u\n", pcie->l0s_exit_time);
+ seq_printf(s, "recov_time:\t%u\n", pcie->recov_time);
+ seq_printf(s, "l0s_fail_cnt\t%u\n", pcie->l0s_fail_cnt);
+ seq_printf(s, "recov_cnt:\t%u\n", pcie->recov_cnt);
+
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(debugfs_stats_snapshot);
+
+static void brcm_pcie_init_debugfs(struct brcm_pcie *pcie)
+{
+ char *name;
+
+ name = devm_kasprintf(pcie->dev, GFP_KERNEL, "%pOFP", pcie->dev->of_node);
+ if (!name)
+ return;
+
+ pcie->debugfs_dir = debugfs_create_dir(name, NULL);
+ if (!pcie->debugfs_dir)
+ return;
+
+ debugfs_create_file("stats_snapshot", 0644, pcie->debugfs_dir, pcie,
+ &debugfs_stats_snapshot_fops);
+ debugfs_create_file("stats_trigger", 0644, pcie->debugfs_dir, pcie,
+ &debugfs_stats_trigger_fops);
+}
+
+static bool trace_ltssm;
+module_param(trace_ltssm, bool, 0444);
+MODULE_PARM_DESC(trace_ltssm, "Capture and dump link states during link training");
+
static int brcm_pcie_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
@@ -2076,6 +2432,19 @@ static int brcm_pcie_probe(struct platfo
pcie->dev = &pdev->dev;
pcie->np = np;
pcie->cfg = data;
+ pcie->trace_ltssm = trace_ltssm;
+
+ brcm_pcie_init_debugfs(pcie);
+
+ if (pcie->trace_ltssm) {
+ pcie->ltssm_trace_buf = devm_kzalloc(&pdev->dev,
+ sizeof(struct trace_entry) * TRACE_BUF_LEN,
+ GFP_KERNEL);
+ if (!pcie->ltssm_trace_buf) {
+ dev_err(&pdev->dev, "could not allocate trace buffer\n");
+ return -ENOMEM;
+ }
+ }
pcie->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(pcie->base))