summaryrefslogtreecommitdiffstats
path: root/sys-kernel/boest-v5.0.15/raspberrypi/0099-AXI-performance-monitor-driver-2222.patch
diff options
context:
space:
mode:
Diffstat (limited to 'sys-kernel/boest-v5.0.15/raspberrypi/0099-AXI-performance-monitor-driver-2222.patch')
-rw-r--r--sys-kernel/boest-v5.0.15/raspberrypi/0099-AXI-performance-monitor-driver-2222.patch686
1 files changed, 686 insertions, 0 deletions
diff --git a/sys-kernel/boest-v5.0.15/raspberrypi/0099-AXI-performance-monitor-driver-2222.patch b/sys-kernel/boest-v5.0.15/raspberrypi/0099-AXI-performance-monitor-driver-2222.patch
new file mode 100644
index 00000000..cc9346e3
--- /dev/null
+++ b/sys-kernel/boest-v5.0.15/raspberrypi/0099-AXI-performance-monitor-driver-2222.patch
@@ -0,0 +1,686 @@
+From 75275b55ce345e86d8e9dea654b3bb13296e427c Mon Sep 17 00:00:00 2001
+From: James Hughes <JamesH65@users.noreply.github.com>
+Date: Tue, 14 Nov 2017 15:13:15 +0000
+Subject: [PATCH 099/194] AXI performance monitor driver (#2222)
+
+Uses the debugfs I/F to provide access to the AXI
+bus performance monitors.
+
+Requires the new mailbox peripheral access for access
+to the VPU performance registers, system bus access
+is done using direct register reads.
+
+Signed-off-by: James Hughes <james.hughes@raspberrypi.org>
+---
+ drivers/perf/Kconfig | 7 +
+ drivers/perf/Makefile | 1 +
+ drivers/perf/raspberrypi_axi_monitor.c | 636 +++++++++++++++++++++++++
+ 3 files changed, 644 insertions(+)
+
+diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig
+index af9bc178495d..26f304260bc2 100644
+--- a/drivers/perf/Kconfig
++++ b/drivers/perf/Kconfig
+@@ -111,4 +111,11 @@ config ARM_SPE_PMU
+ Extension, which provides periodic sampling of operations in
+ the CPU pipeline and reports this via the perf AUX interface.
+
++config RPI_AXIPERF
++ depends on ARCH_BCM2835
++ tristate "RaspberryPi AXI Performance monitors"
++ default n
++ help
++ Say y if you want to use Raspberry Pi AXI performance monitors, m if
++ you want to build it as a module.
+ endmenu
+diff --git a/drivers/perf/Makefile b/drivers/perf/Makefile
+index 909f27fd9db3..7d8eab1be3ec 100644
+--- a/drivers/perf/Makefile
++++ b/drivers/perf/Makefile
+@@ -10,3 +10,4 @@ obj-$(CONFIG_QCOM_L3_PMU) += qcom_l3_pmu.o
+ obj-$(CONFIG_THUNDERX2_PMU) += thunderx2_pmu.o
+ obj-$(CONFIG_XGENE_PMU) += xgene_pmu.o
+ obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
++obj-$(CONFIG_RPI_AXIPERF) += raspberrypi_axi_monitor.o
+diff --git a/drivers/perf/raspberrypi_axi_monitor.c b/drivers/perf/raspberrypi_axi_monitor.c
+new file mode 100644
+index 000000000000..fc82e923f910
+--- /dev/null
++++ b/drivers/perf/raspberrypi_axi_monitor.c
+@@ -0,0 +1,636 @@
++/*
++ * raspberrypi_axi_monitor.c
++ *
++ * Author: james.hughes@raspberrypi.org
++ *
++ * Raspberry Pi AXI performance counters.
++ *
++ * Copyright (C) 2017 Raspberry Pi Trading Ltd.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++
++#include <linux/debugfs.h>
++#include <linux/devcoredump.h>
++#include <linux/device.h>
++#include <linux/kthread.h>
++#include <linux/module.h>
++#include <linux/netdevice.h>
++#include <linux/mutex.h>
++#include <linux/of.h>
++#include <linux/platform_device.h>
++
++#include <soc/bcm2835/raspberrypi-firmware.h>
++
++#define NUM_MONITORS 2
++#define NUM_BUS_WATCHERS_PER_MONITOR 3
++
++#define SYSTEM_MONITOR 0
++#define VPU_MONITOR 1
++
++#define MAX_BUSES 16
++#define DEFAULT_SAMPLE_TIME 100
++
++#define NUM_BUS_WATCHER_RESULTS 9
++
++struct bus_watcher_data {
++ union {
++ u32 results[NUM_BUS_WATCHER_RESULTS];
++ struct {
++ u32 atrans;
++ u32 atwait;
++ u32 amax;
++ u32 wtrans;
++ u32 wtwait;
++ u32 wmax;
++ u32 rtrans;
++ u32 rtwait;
++ u32 rmax;
++ };
++ };
++};
++
++
++struct rpi_axiperf {
++ struct platform_device *dev;
++ struct dentry *root_folder;
++
++ struct task_struct *monitor_thread;
++ struct mutex lock;
++
++ struct rpi_firmware *firmware;
++
++ /* Sample time spent on for each bus */
++ int sample_time;
++
++ /* Now storage for the per monitor settings and the resulting
++ * performance figures
++ */
++ struct {
++ /* Bit field of buses we want to monitor */
++ int bus_enabled;
++ /* Bit field of buses to filter by */
++ int bus_filter;
++ /* The current buses being monitored on this monitor */
++ int current_bus[NUM_BUS_WATCHERS_PER_MONITOR];
++ /* The last bus monitored on this monitor */
++ int last_monitored;
++
++ /* Set true if this mailbox must use the mailbox interface
++ * rather than access registers directly.
++ */
++ int use_mailbox_interface;
++
++ /* Current result values */
++ struct bus_watcher_data results[MAX_BUSES];
++
++ struct dentry *debugfs_entry;
++ void __iomem *base_address;
++
++ } monitor[NUM_MONITORS];
++
++};
++
++static struct rpi_axiperf *state;
++
++/* Two monitors, System and VPU, each with the following register sets.
++ * Each monitor can only monitor one bus at a time, so we time share them,
++ * giving each bus 100ms (default, settable via debugfs) of time on its
++ * associated monitor
++ * Record results from the three Bus watchers per monitor and push to the sysfs
++ */
++
++/* general registers */
++const int GEN_CTRL;
++
++const int GEN_CTL_ENABLE_BIT = BIT(0);
++const int GEN_CTL_RESET_BIT = BIT(1);
++
++/* Bus watcher registers */
++const int BW_PITCH = 0x40;
++
++const int BW0_CTRL = 0x40;
++const int BW1_CTRL = 0x80;
++const int BW2_CTRL = 0xc0;
++
++const int BW_ATRANS_OFFSET = 0x04;
++const int BW_ATWAIT_OFFSET = 0x08;
++const int BW_AMAX_OFFSET = 0x0c;
++const int BW_WTRANS_OFFSET = 0x10;
++const int BW_WTWAIT_OFFSET = 0x14;
++const int BW_WMAX_OFFSET = 0x18;
++const int BW_RTRANS_OFFSET = 0x1c;
++const int BW_RTWAIT_OFFSET = 0x20;
++const int BW_RMAX_OFFSET = 0x24;
++
++const int BW_CTRL_RESET_BIT = BIT(31);
++const int BW_CTRL_ENABLE_BIT = BIT(30);
++const int BW_CTRL_ENABLE_ID_FILTER_BIT = BIT(29);
++const int BW_CTRL_LIMIT_HALT_BIT = BIT(28);
++
++const int BW_CTRL_SOURCE_SHIFT = 8;
++const int BW_CTRL_SOURCE_MASK = GENMASK(12, 8); // 5 bits
++const int BW_CTRL_BUS_WATCH_SHIFT;
++const int BW_CTRL_BUS_WATCH_MASK = GENMASK(5, 0); // 6 bits
++const int BW_CTRL_BUS_FILTER_SHIFT = 8;
++
++const static char *bus_filter_strings[] = {
++ "",
++ "CORE0_V",
++ "ICACHE0",
++ "DCACHE0",
++ "CORE1_V",
++ "ICACHE1",
++ "DCACHE1",
++ "L2_MAIN",
++ "HOST_PORT",
++ "HOST_PORT2",
++ "HVS",
++ "ISP",
++ "VIDEO_DCT",
++ "VIDEO_SD2AXI",
++ "CAM0",
++ "CAM1",
++ "DMA0",
++ "DMA1",
++ "DMA2_VPU",
++ "JPEG",
++ "VIDEO_CME",
++ "TRANSPOSER",
++ "VIDEO_FME",
++ "CCP2TX",
++ "USB",
++ "V3D0",
++ "V3D1",
++ "V3D2",
++ "AVE",
++ "DEBUG",
++ "CPU",
++ "M30"
++};
++
++const int num_bus_filters = ARRAY_SIZE(bus_filter_strings);
++
++const static char *system_bus_string[] = {
++ "DMA_L2",
++ "TRANS",
++ "JPEG",
++ "SYSTEM_UC",
++ "DMA_UC",
++ "SYSTEM_L2",
++ "CCP2TX",
++ "MPHI_RX",
++ "MPHI_TX",
++ "HVS",
++ "H264",
++ "ISP",
++ "V3D",
++ "PERIPHERAL",
++ "CPU_UC",
++ "CPU_L2"
++};
++
++const int num_system_buses = ARRAY_SIZE(system_bus_string);
++
++const static char *vpu_bus_string[] = {
++ "VPU1_D_L2",
++ "VPU0_D_L2",
++ "VPU1_I_L2",
++ "VPU0_I_L2",
++ "SYSTEM_L2",
++ "L2_FLUSH",
++ "DMA_L2",
++ "VPU1_D_UC",
++ "VPU0_D_UC",
++ "VPU1_I_UC",
++ "VPU0_I_UC",
++ "SYSTEM_UC",
++ "L2_OUT",
++ "DMA_UC",
++ "SDRAM",
++ "L2_IN"
++};
++
++const int num_vpu_buses = ARRAY_SIZE(vpu_bus_string);
++
++const static char *monitor_name[] = {
++ "System",
++ "VPU"
++};
++
++static inline void write_reg(int monitor, int reg, u32 value)
++{
++ writel(value, state->monitor[monitor].base_address + reg);
++}
++
++static inline u32 read_reg(int monitor, u32 reg)
++{
++ return readl(state->monitor[monitor].base_address + reg);
++}
++
++static void read_bus_watcher(int monitor, int watcher, u32 *results)
++{
++ if (state->monitor[monitor].use_mailbox_interface) {
++ /* We have 9 results, plus the overheads of start address and
++ * length So 11 u32 to define
++ */
++ u32 tmp[11];
++ int err;
++
++ tmp[0] = (u32)(state->monitor[monitor].base_address + watcher
++ + BW_ATRANS_OFFSET);
++ tmp[1] = NUM_BUS_WATCHER_RESULTS;
++
++ err = rpi_firmware_property(state->firmware,
++ RPI_FIRMWARE_GET_PERIPH_REG,
++ tmp, sizeof(tmp));
++
++ if (err < 0 || tmp[1] != NUM_BUS_WATCHER_RESULTS)
++ dev_err_once(&state->dev->dev,
++ "Failed to read bus watcher");
++ else
++ memcpy(results, &tmp[2],
++ NUM_BUS_WATCHER_RESULTS * sizeof(u32));
++ } else {
++ int i;
++ void __iomem *addr = state->monitor[monitor].base_address
++ + watcher + BW_ATRANS_OFFSET;
++ for (i = 0; i < NUM_BUS_WATCHER_RESULTS; i++, addr += 4)
++ *results++ = readl(addr);
++ }
++}
++
++static void set_monitor_control(int monitor, u32 set)
++{
++ if (state->monitor[monitor].use_mailbox_interface) {
++ u32 tmp[3] = {(u32)(state->monitor[monitor].base_address +
++ GEN_CTRL), 1, set};
++ int err = rpi_firmware_property(state->firmware,
++ RPI_FIRMWARE_SET_PERIPH_REG,
++ tmp, sizeof(tmp));
++
++ if (err < 0 || tmp[1] != 1)
++ dev_err_once(&state->dev->dev,
++ "Failed to set monitor control");
++ } else
++ write_reg(monitor, GEN_CTRL, set);
++}
++
++static void set_bus_watcher_control(int monitor, int watcher, u32 set)
++{
++ if (state->monitor[monitor].use_mailbox_interface) {
++ u32 tmp[3] = {(u32)(state->monitor[monitor].base_address +
++ watcher), 1, set};
++ int err = rpi_firmware_property(state->firmware,
++ RPI_FIRMWARE_SET_PERIPH_REG,
++ tmp, sizeof(tmp));
++ if (err < 0 || tmp[1] != 1)
++ dev_err_once(&state->dev->dev,
++ "Failed to set bus watcher control");
++ } else
++ write_reg(monitor, watcher, set);
++}
++
++static void monitor(struct rpi_axiperf *state)
++{
++ int monitor, num_buses[NUM_MONITORS];
++
++ mutex_lock(&state->lock);
++
++ for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
++ typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
++
++ /* Anything enabled? */
++ if (mon->bus_enabled == 0) {
++ /* No, disable all monitoring for this monitor */
++ set_monitor_control(monitor, GEN_CTL_RESET_BIT);
++ } else {
++ int i;
++
++ /* Find out how many busses we want to monitor, and
++ * spread our 3 actual monitors over them
++ */
++ num_buses[monitor] = hweight32(mon->bus_enabled);
++ num_buses[monitor] = min(num_buses[monitor],
++ NUM_BUS_WATCHERS_PER_MONITOR);
++
++ for (i = 0; i < num_buses[monitor]; i++) {
++ int bus_control;
++
++ do {
++ mon->last_monitored++;
++ mon->last_monitored &= 0xf;
++ } while ((mon->bus_enabled &
++ (1 << mon->last_monitored)) == 0);
++
++ mon->current_bus[i] = mon->last_monitored;
++
++ /* Reset the counters */
++ set_bus_watcher_control(monitor,
++ BW0_CTRL +
++ i*BW_PITCH,
++ BW_CTRL_RESET_BIT);
++
++ bus_control = BW_CTRL_ENABLE_BIT |
++ mon->current_bus[i];
++
++ if (mon->bus_filter) {
++ bus_control |=
++ BW_CTRL_ENABLE_ID_FILTER_BIT;
++ bus_control |=
++ ((mon->bus_filter & 0x1f)
++ << BW_CTRL_BUS_FILTER_SHIFT);
++ }
++
++ // Start capture
++ set_bus_watcher_control(monitor,
++ BW0_CTRL + i*BW_PITCH,
++ bus_control);
++ }
++ }
++
++ /* start monitoring */
++ set_monitor_control(monitor, GEN_CTL_ENABLE_BIT);
++ }
++
++ mutex_unlock(&state->lock);
++
++ msleep(state->sample_time);
++
++ /* Now read the results */
++
++ mutex_lock(&state->lock);
++ for (monitor = 0; monitor < NUM_MONITORS; monitor++) {
++ typeof(state->monitor[0]) *mon = &(state->monitor[monitor]);
++
++ /* Anything enabled? */
++ if (mon->bus_enabled == 0) {
++ /* No, disable all monitoring for this monitor */
++ set_monitor_control(monitor, 0);
++ } else {
++ int i;
++
++ for (i = 0; i < num_buses[monitor]; i++) {
++ int bus = mon->current_bus[i];
++
++ read_bus_watcher(monitor,
++ BW0_CTRL + i*BW_PITCH,
++ (u32 *)&mon->results[bus].results);
++ }
++ }
++ }
++ mutex_unlock(&state->lock);
++}
++
++static int monitor_thread(void *data)
++{
++ struct rpi_axiperf *state = data;
++
++ while (1) {
++ monitor(state);
++
++ if (kthread_should_stop())
++ return 0;
++ }
++ return 0;
++}
++
++static ssize_t myreader(struct file *fp, char __user *user_buffer,
++ size_t count, loff_t *position)
++{
++#define INIT_BUFF_SIZE 2048
++
++ int i;
++ int idx = (int)(fp->private_data);
++ int num_buses, cnt;
++ char *string_buffer;
++ int buff_size = INIT_BUFF_SIZE;
++ char *p;
++ typeof(state->monitor[0]) *mon = &(state->monitor[idx]);
++
++ if (idx < 0 || idx > NUM_MONITORS)
++ idx = 0;
++
++ num_buses = idx == SYSTEM_MONITOR ? num_system_buses : num_vpu_buses;
++
++ string_buffer = kmalloc(buff_size, GFP_KERNEL);
++
++ if (!string_buffer) {
++ dev_err(&state->dev->dev,
++ "Failed temporary string allocation\n");
++ return 0;
++ }
++
++ p = string_buffer;
++
++ mutex_lock(&state->lock);
++
++ if (mon->bus_filter) {
++ int filt = min(mon->bus_filter & 0x1f, num_bus_filters);
++
++ cnt = snprintf(p, buff_size,
++ "\nMonitoring transactions from %s only\n",
++ bus_filter_strings[filt]);
++ p += cnt;
++ buff_size -= cnt;
++ }
++
++ cnt = snprintf(p, buff_size, " Bus | Atrans Atwait AMax Wtrans Wtwait WMax Rtrans Rtwait RMax\n"
++ "======================================================================================================\n");
++
++ if (cnt >= buff_size)
++ goto done;
++
++ p += cnt;
++ buff_size -= cnt;
++
++ for (i = 0; i < num_buses; i++) {
++ if (mon->bus_enabled & (1 << i)) {
++#define DIVIDER (1024)
++ typeof(mon->results[0]) *res = &(mon->results[i]);
++
++ cnt = snprintf(p, buff_size,
++ "%10s | %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK %8uK\n",
++ idx == SYSTEM_MONITOR ?
++ system_bus_string[i] :
++ vpu_bus_string[i],
++ res->atrans/DIVIDER,
++ res->atwait/DIVIDER,
++ res->amax/DIVIDER,
++ res->wtrans/DIVIDER,
++ res->wtwait/DIVIDER,
++ res->wmax/DIVIDER,
++ res->rtrans/DIVIDER,
++ res->rtwait/DIVIDER,
++ res->rmax/DIVIDER
++ );
++ if (cnt >= buff_size)
++ goto done;
++
++ p += cnt;
++ buff_size -= cnt;
++ }
++ }
++
++ mutex_unlock(&state->lock);
++
++done:
++
++ /* did the last string entry exceeed our buffer size? ie out of string
++ * buffer space. Null terminate, use what we have.
++ */
++ if (cnt >= buff_size) {
++ buff_size = 0;
++ string_buffer[INIT_BUFF_SIZE] = 0;
++ }
++
++ cnt = simple_read_from_buffer(user_buffer, count, position,
++ string_buffer,
++ INIT_BUFF_SIZE - buff_size);
++
++ kfree(string_buffer);
++
++ return cnt;
++}
++
++static ssize_t mywriter(struct file *fp, const char __user *user_buffer,
++ size_t count, loff_t *position)
++{
++ int idx = (int)(fp->private_data);
++
++ if (idx < 0 || idx > NUM_MONITORS)
++ idx = 0;
++
++ /* At the moment, this does nothing, but in the future it could be
++ * used to reset counters etc
++ */
++ return count;
++}
++
++static const struct file_operations fops_debug = {
++ .read = myreader,
++ .write = mywriter,
++ .open = simple_open
++};
++
++static int rpi_axiperf_probe(struct platform_device *pdev)
++{
++ int ret = 0, i;
++ struct device *dev = &pdev->dev;
++ struct device_node *np = dev->of_node;
++ struct device_node *fw_node;
++
++ state = kzalloc(sizeof(struct rpi_axiperf), GFP_KERNEL);
++ if (!state)
++ return -ENOMEM;
++
++ /* Get the firmware handle for future rpi-firmware-xxx calls */
++ fw_node = of_parse_phandle(np, "firmware", 0);
++ if (!fw_node) {
++ dev_err(dev, "Missing firmware node\n");
++ return -ENOENT;
++ }
++
++ state->firmware = rpi_firmware_get(fw_node);
++ if (!state->firmware)
++ return -EPROBE_DEFER;
++
++ /* Special case for the VPU monitor, we must use the mailbox interface
++ * as it is not accessible from the ARM address space.
++ */
++ state->monitor[VPU_MONITOR].use_mailbox_interface = 1;
++ state->monitor[SYSTEM_MONITOR].use_mailbox_interface = 0;
++
++ for (i = 0; i < NUM_MONITORS; i++) {
++ if (state->monitor[i].use_mailbox_interface) {
++ of_property_read_u32_index(np, "reg", i*2,
++ (u32 *)(&state->monitor[i].base_address));
++ } else {
++ struct resource *resource =
++ platform_get_resource(pdev, IORESOURCE_MEM, i);
++
++ state->monitor[i].base_address =
++ devm_ioremap_resource(&pdev->dev, resource);
++ }
++
++ if (IS_ERR(state->monitor[i].base_address))
++ return PTR_ERR(state->monitor[i].base_address);
++
++ /* Enable all buses by default */
++ state->monitor[i].bus_enabled = 0xffff;
++ }
++
++ state->dev = pdev;
++ platform_set_drvdata(pdev, state);
++
++ state->sample_time = DEFAULT_SAMPLE_TIME;
++
++ /* Set up all the debugfs stuff */
++ state->root_folder = debugfs_create_dir(KBUILD_MODNAME, NULL);
++
++ for (i = 0; i < NUM_MONITORS; i++) {
++ state->monitor[i].debugfs_entry =
++ debugfs_create_dir(monitor_name[i], state->root_folder);
++ if (IS_ERR(state->monitor[i].debugfs_entry))
++ state->monitor[i].debugfs_entry = NULL;
++
++ debugfs_create_file("data", 0444,
++ state->monitor[i].debugfs_entry,
++ (void *)i, &fops_debug);
++ debugfs_create_u32("enable", 0644,
++ state->monitor[i].debugfs_entry,
++ &state->monitor[i].bus_enabled);
++ debugfs_create_u32("filter", 0644,
++ state->monitor[i].debugfs_entry,
++ &state->monitor[i].bus_filter);
++ debugfs_create_u32("sample_time", 0644,
++ state->monitor[i].debugfs_entry,
++ &state->sample_time);
++ }
++
++ mutex_init(&state->lock);
++
++ state->monitor_thread = kthread_run(monitor_thread, state,
++ "rpi-axiperfmon");
++
++ return ret;
++
++}
++
++static int rpi_axiperf_remove(struct platform_device *dev)
++{
++ int ret = 0;
++
++ kthread_stop(state->monitor_thread);
++
++ debugfs_remove_recursive(state->root_folder);
++ state->root_folder = NULL;
++
++ return ret;
++}
++
++static const struct of_device_id rpi_axiperf_match[] = {
++ {
++ .compatible = "brcm,bcm2835-axiperf",
++ },
++ {},
++};
++MODULE_DEVICE_TABLE(of, rpi_axiperf_match);
++
++static struct platform_driver rpi_axiperf_driver = {
++ .probe = rpi_axiperf_probe,
++ .remove = rpi_axiperf_remove,
++ .driver = {
++ .name = "rpi-bcm2835-axiperf",
++ .of_match_table = of_match_ptr(rpi_axiperf_match),
++ },
++};
++
++module_platform_driver(rpi_axiperf_driver);
++
++/* Module information */
++MODULE_AUTHOR("James Hughes <james.hughes@raspberrypi.org>");
++MODULE_DESCRIPTION("RPI AXI Performance monitor driver");
++MODULE_LICENSE("GPL");