forked from Openwrt/openwrt
8c405cdccc
The patches were generated from the RPi repo with the following command: git format-patch v6.6.34..rpi-6.1.y Some patches needed rebasing and, as usual, the applied and reverted, wireless drivers, Github workflows, READMEs and defconfigs patches were removed. Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
399 lines
9.6 KiB
Diff
399 lines
9.6 KiB
Diff
From 7b5e845f3243afd393ede5ca0e5de310115ccf30 Mon Sep 17 00:00:00 2001
|
|
From: Dom Cobley <popcornmix@gmail.com>
|
|
Date: Thu, 8 Jun 2023 11:33:08 +0100
|
|
Subject: [PATCH 0910/1085] perf/raspberry: Add support for 2712 axi
|
|
performance monitors
|
|
|
|
Also handle 2711 correctly which has a different configuration
|
|
from 2835.
|
|
|
|
Signed-off-by: Dom Cobley <popcornmix@gmail.com>
|
|
---
|
|
drivers/perf/raspberrypi_axi_monitor.c | 257 ++++++++++++++++++++++---
|
|
1 file changed, 225 insertions(+), 32 deletions(-)
|
|
|
|
--- a/drivers/perf/raspberrypi_axi_monitor.c
|
|
+++ b/drivers/perf/raspberrypi_axi_monitor.c
|
|
@@ -33,7 +33,7 @@
|
|
#define MAX_BUSES 16
|
|
#define DEFAULT_SAMPLE_TIME 100
|
|
|
|
-#define NUM_BUS_WATCHER_RESULTS 9
|
|
+#define NUM_BUS_WATCHER_RESULTS 11
|
|
|
|
struct bus_watcher_data {
|
|
union {
|
|
@@ -48,6 +48,8 @@ struct bus_watcher_data {
|
|
u32 rtrans;
|
|
u32 rtwait;
|
|
u32 rmax;
|
|
+ u32 rpend;
|
|
+ u32 ratrans;
|
|
};
|
|
};
|
|
};
|
|
@@ -65,6 +67,9 @@ struct rpi_axiperf {
|
|
/* Sample time spent on for each bus */
|
|
int sample_time;
|
|
|
|
+ /* chip specific bus config */
|
|
+ const struct bwconfig_config *config;
|
|
+
|
|
/* Now storage for the per monitor settings and the resulting
|
|
* performance figures
|
|
*/
|
|
@@ -107,6 +112,7 @@ const int GEN_CTRL;
|
|
|
|
const int GEN_CTL_ENABLE_BIT = BIT(0);
|
|
const int GEN_CTL_RESET_BIT = BIT(1);
|
|
+const int GEN_CTL_WATCH_BIT = BIT(2);
|
|
|
|
/* Bus watcher registers */
|
|
const int BW_PITCH = 0x40;
|
|
@@ -136,7 +142,7 @@ const int BW_CTRL_BUS_WATCH_SHIFT;
|
|
const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits
|
|
const int BW_CTRL_BUS_FILTER_SHIFT = 8;
|
|
|
|
-const static char *bus_filter_strings[] = {
|
|
+static const char *bus_filter_strings[] = {
|
|
"",
|
|
"CORE0_V",
|
|
"ICACHE0",
|
|
@@ -171,9 +177,96 @@ const static char *bus_filter_strings[]
|
|
"M30"
|
|
};
|
|
|
|
-const int num_bus_filters = ARRAY_SIZE(bus_filter_strings);
|
|
+static const char * const bus_filter_strings_2711[] = {
|
|
+ "AIO",
|
|
+ "CORE0_V",
|
|
+ "ICACHE0",
|
|
+ "DCACHE0",
|
|
+ "CORE1_V",
|
|
+ "ICACHE1",
|
|
+ "DCACHE1",
|
|
+ "L2_MAIN",
|
|
+ "ARGON",
|
|
+ "PCIE",
|
|
+ "HVS",
|
|
+ "ISP",
|
|
+ "VIDEO_DCT",
|
|
+ "VIDEO_SD2AXI",
|
|
+ "CAM0",
|
|
+ "CAM1",
|
|
+ "DMA0",
|
|
+ "DMA1",
|
|
+ "DMA2",
|
|
+ "JPEG",
|
|
+ "VIDEO_CME",
|
|
+ "TRANSPOSER",
|
|
+ "VIDEO_FME",
|
|
+ "GIGE",
|
|
+ "USB",
|
|
+ "V3D0",
|
|
+ "V3D1",
|
|
+ "V3D2",
|
|
+ "GISB_AXI",
|
|
+ "DEBUG",
|
|
+ "ARM",
|
|
+ "EMMCSTB",
|
|
+};
|
|
|
|
-const static char *system_bus_string[] = {
|
|
+static const char * const bus_filter_strings_2712[] = {
|
|
+ "",
|
|
+ "VPU_UC0",
|
|
+ "VPU_IC0",
|
|
+ "VPU_DC0",
|
|
+ "VPU_UC1",
|
|
+ "VPU_IC1",
|
|
+ "VPU_DC1",
|
|
+ "VPU_L2",
|
|
+ "DMA2",
|
|
+ "VPU_DEBUG",
|
|
+ "ARM",
|
|
+ "DMA0",
|
|
+ "DMA1",
|
|
+ "RAAGA",
|
|
+ "BBSI",
|
|
+ "PCIE0",
|
|
+ "PCIE1",
|
|
+ "PCIE2",
|
|
+ "UMR",
|
|
+ "SAGE",
|
|
+ "HVDP",
|
|
+ "BSP",
|
|
+ "HVS",
|
|
+ "HVS_WMK",
|
|
+ "MOP0",
|
|
+ "MOP1",
|
|
+ "MBVN",
|
|
+ "DSI",
|
|
+ "XPT",
|
|
+ "EMMC0",
|
|
+ "GENET",
|
|
+ "USB",
|
|
+ "ARGON",
|
|
+ "UNICAM",
|
|
+ "PISP",
|
|
+ "PISPFE",
|
|
+ "JPEG",
|
|
+ "EMMC1",
|
|
+ "EMMC2",
|
|
+ "TRC",
|
|
+ "BSTM0",
|
|
+ "BSTM1",
|
|
+ "BSTM0_SEC",
|
|
+ "BSTM1_SEC",
|
|
+ "AIO",
|
|
+ "MAP",
|
|
+ "SYS_DMA",
|
|
+ "MMUCACHE0",
|
|
+ "MMUCACHE1",
|
|
+ "MPUCACHE0",
|
|
+ "MPUCACHE1",
|
|
+};
|
|
+
|
|
+static const char *system_bus_string[] = {
|
|
"DMA_L2",
|
|
"TRANS",
|
|
"JPEG",
|
|
@@ -192,9 +285,38 @@ const static char *system_bus_string[] =
|
|
"CPU_L2"
|
|
};
|
|
|
|
-const int num_system_buses = ARRAY_SIZE(system_bus_string);
|
|
+static const char * const system_bus_string_2711[] = {
|
|
+ "DMA_L2",
|
|
+ "TRANS",
|
|
+ "JPEG",
|
|
+ "VPU_UC",
|
|
+ "DMA_UC",
|
|
+ "SYSTEM_L2",
|
|
+ "HVS",
|
|
+ "ARGON",
|
|
+ "H264",
|
|
+ "PERIPHERAL",
|
|
+ "ARM_UC",
|
|
+ "ARM_L2",
|
|
+};
|
|
+
|
|
+static const char * const system_bus_string_2712[] = {
|
|
+ "VPU_UC",
|
|
+ "DISPLAY_TOP",
|
|
+ "V3D",
|
|
+ "ARM",
|
|
+ "XPT",
|
|
+ "BSTM_TOP",
|
|
+ "PCIE_01",
|
|
+ "ARGON_TOP",
|
|
+ "ARB3",
|
|
+ "SRC",
|
|
+ "HVDP",
|
|
+ "PER",
|
|
+ "SYSTEM_L2",
|
|
+};
|
|
|
|
-const static char *vpu_bus_string[] = {
|
|
+static const char *vpu_bus_string[] = {
|
|
"VPU1_D_L2",
|
|
"VPU0_D_L2",
|
|
"VPU1_I_L2",
|
|
@@ -213,7 +335,66 @@ const static char *vpu_bus_string[] = {
|
|
"L2_IN"
|
|
};
|
|
|
|
-const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string);
|
|
+static const char * const vpu_bus_string_2711[] = {
|
|
+ "VPU1_D_L2",
|
|
+ "VPU0_D_L2",
|
|
+ "VPU1_I_L2",
|
|
+ "VPU0_I_L2",
|
|
+ "SYSTEM_L2",
|
|
+ "DMA_L2",
|
|
+ "VPU1_D_UC",
|
|
+ "VPU0_D_UC",
|
|
+ "VPU1_I_UC",
|
|
+ "VPU0_I_UC",
|
|
+ "VPU_UC",
|
|
+ "L2_OUT",
|
|
+ "DMA_UC",
|
|
+ "L2_IN"
|
|
+};
|
|
+
|
|
+static const char * const vpu_bus_string_2712[] = {
|
|
+ "VPU1_D_L2",
|
|
+ "VPU0_D_L2",
|
|
+ "VPU1_I_L2",
|
|
+ "VPU0_I_L2",
|
|
+ "SYSTEM_L2",
|
|
+ "DMA_L2",
|
|
+ "VPU1_D_UC",
|
|
+ "VPU0_D_UC",
|
|
+ "VPU1_I_UC",
|
|
+ "VPU0_I_UC",
|
|
+ "VPU_UC",
|
|
+ "L2_OUT",
|
|
+ "DMA_UC",
|
|
+ "L2_IN"
|
|
+};
|
|
+
|
|
+struct bwconfig_config {
|
|
+ const char * const *bus_filter_strings;
|
|
+ const int num_bus_filters;
|
|
+ const char * const *system_bus_string;
|
|
+ const int num_system_buses;
|
|
+ const char * const *vpu_bus_string;
|
|
+ const int num_vpu_buses;
|
|
+};
|
|
+
|
|
+static const struct bwconfig_config config_2835 = {
|
|
+ bus_filter_strings, ARRAY_SIZE(bus_filter_strings),
|
|
+ system_bus_string, ARRAY_SIZE(system_bus_string),
|
|
+ vpu_bus_string, ARRAY_SIZE(vpu_bus_string),
|
|
+};
|
|
+
|
|
+static const struct bwconfig_config config_2711 = {
|
|
+ bus_filter_strings_2711, ARRAY_SIZE(bus_filter_strings_2711),
|
|
+ system_bus_string_2711, ARRAY_SIZE(system_bus_string_2711),
|
|
+ vpu_bus_string_2711, ARRAY_SIZE(vpu_bus_string_2711),
|
|
+};
|
|
+
|
|
+static const struct bwconfig_config config_2712 = {
|
|
+ bus_filter_strings_2712, ARRAY_SIZE(bus_filter_strings_2712),
|
|
+ system_bus_string_2712, ARRAY_SIZE(system_bus_string_2712),
|
|
+ vpu_bus_string_2712, ARRAY_SIZE(vpu_bus_string_2712),
|
|
+};
|
|
|
|
const static char *monitor_name[] = {
|
|
"System",
|
|
@@ -233,10 +414,10 @@ static inline u32 read_reg(int monitor,
|
|
static void read_bus_watcher(int monitor, int watcher, u32 *results)
|
|
{
|
|
if (state->monitor[monitor].use_mailbox_interface) {
|
|
- /* We have 9 results, plus the overheads of start address and
|
|
- * length So 11 u32 to define
|
|
+ /* We have NUM_BUS_WATCHER_RESULTS results, plus the overheads
|
|
+ * of start address and length
|
|
*/
|
|
- u32 tmp[11];
|
|
+ u32 tmp[NUM_BUS_WATCHER_RESULTS+2];
|
|
int err;
|
|
|
|
tmp[0] = (u32)(uintptr_t)(state->monitor[monitor].base_address + watcher
|
|
@@ -352,7 +533,7 @@ static void monitor(struct rpi_axiperf *
|
|
}
|
|
|
|
/* start monitoring */
|
|
- set_monitor_control(monitor, GEN_CTL_ENABLE_BIT);
|
|
+ set_monitor_control(monitor, GEN_CTL_ENABLE_BIT | GEN_CTL_WATCH_BIT);
|
|
}
|
|
|
|
mutex_unlock(&state->lock);
|
|
@@ -409,11 +590,12 @@ static ssize_t myreader(struct file *fp,
|
|
int buff_size = INIT_BUFF_SIZE;
|
|
char *p;
|
|
typeof(state->monitor[0]) *mon = &(state->monitor[idx]);
|
|
+ const struct bwconfig_config *config = state->config;
|
|
|
|
if (idx < 0 || idx > NUM_MONITORS)
|
|
idx = 0;
|
|
|
|
- num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses;
|
|
+ num_buses = idx == SYSTEM_MONITOR ? config->num_system_buses : config->num_vpu_buses;
|
|
|
|
string_buffer = kmalloc(buff_size, GFP_KERNEL);
|
|
|
|
@@ -428,17 +610,17 @@ static ssize_t myreader(struct file *fp,
|
|
mutex_lock(&state->lock);
|
|
|
|
if (mon->bus_filter) {
|
|
- int filt = min(mon->bus_filter & 0x1f, num_bus_filters);
|
|
+ int filt = min(mon->bus_filter & 0x1f, config->num_bus_filters);
|
|
|
|
cnt = snprintf(p, buff_size,
|
|
"\nMonitoring transactions from %s only\n",
|
|
- bus_filter_strings[filt]);
|
|
+ config->bus_filter_strings[filt]);
|
|
p += cnt;
|
|
buff_size -= cnt;
|
|
}
|
|
|
|
- cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n"
|
|
- "======================================================================================================\n");
|
|
+ cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax RPend RAtrans\n"
|
|
+ "===========================================================================================================================\n");
|
|
|
|
if (cnt >= buff_size)
|
|
goto done;
|
|
@@ -446,25 +628,29 @@ static ssize_t myreader(struct file *fp,
|
|
p += cnt;
|
|
buff_size -= cnt;
|
|
|
|
+#define M(x) ((x) >= 1000000000 ? (x)/1000000 : (x) >= 1000 ? (x)/1000 : (x))
|
|
+#define N(x) ((x) >= 1000000000 ? 'M' : (x) >= 1000 ? 'K' : ' ')
|
|
+
|
|
for (i = 0; i < num_buses; i++) {
|
|
if (mon->bus_enabled & (1 << i)) {
|
|
-#define DIVIDER (1024)
|
|
typeof(mon->results[0]) *res = &(mon->results[i]);
|
|
|
|
cnt = snprintf(p, buff_size,
|
|
- "%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n",
|
|
+ "%11s | %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c %8u%c\n",
|
|
idx == SYSTEM_MONITOR ?
|
|
- system_bus_string[i] :
|
|
- vpu_bus_string[i],
|
|
- res->atrans/DIVIDER,
|
|
- res->atwait/DIVIDER,
|
|
- res->amax/DIVIDER,
|
|
- res->wtrans/DIVIDER,
|
|
- res->wtwait/DIVIDER,
|
|
- res->wmax/DIVIDER,
|
|
- res->rtrans/DIVIDER,
|
|
- res->rtwait/DIVIDER,
|
|
- res->rmax/DIVIDER
|
|
+ config->system_bus_string[i] :
|
|
+ config->vpu_bus_string[i],
|
|
+ M(res->atrans), N(res->atrans),
|
|
+ M(res->atwait), N(res->atwait),
|
|
+ M(res->amax), N(res->amax),
|
|
+ M(res->wtrans), N(res->wtrans),
|
|
+ M(res->wtwait), N(res->wtwait),
|
|
+ M(res->wmax), N(res->wmax),
|
|
+ M(res->rtrans), N(res->rtrans),
|
|
+ M(res->rtwait), N(res->rtwait),
|
|
+ M(res->rmax), N(res->rmax),
|
|
+ M(res->rpend), N(res->rpend),
|
|
+ M(res->ratrans), N(res->ratrans)
|
|
);
|
|
if (cnt >= buff_size)
|
|
goto done;
|
|
@@ -526,6 +712,10 @@ static int rpi_axiperf_probe(struct plat
|
|
if (!state)
|
|
return -ENOMEM;
|
|
|
|
+ state->config = of_device_get_match_data(dev);
|
|
+ if (!state->config)
|
|
+ return -EINVAL;
|
|
+
|
|
/* Get the firmware handle for future rpi-firmware-xxx calls */
|
|
fw_node = of_parse_phandle(np, "firmware", 0);
|
|
if (!fw_node) {
|
|
@@ -612,9 +802,12 @@ static int rpi_axiperf_remove(struct pla
|
|
}
|
|
|
|
static const struct of_device_id rpi_axiperf_match[] = {
|
|
- {
|
|
- .compatible = "brcm,bcm2835-axiperf",
|
|
- },
|
|
+ { .compatible = "brcm,bcm2835-axiperf",
|
|
+ .data = &config_2835 },
|
|
+ { .compatible = "brcm,bcm2711-axiperf",
|
|
+ .data = &config_2711 },
|
|
+ { .compatible = "brcm,bcm2712-axiperf",
|
|
+ .data = &config_2712 },
|
|
{},
|
|
};
|
|
MODULE_DEVICE_TABLE(of, rpi_axiperf_match);
|