diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-11 08:27:49 +0000 |
commit | ace9429bb58fd418f0c81d4c2835699bddf6bde6 (patch) | |
tree | b2d64bc10158fdd5497876388cd68142ca374ed3 /drivers/gpu/host1x | |
parent | Initial commit. (diff) | |
download | linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.tar.xz linux-ace9429bb58fd418f0c81d4c2835699bddf6bde6.zip |
Adding upstream version 6.6.15.upstream/6.6.15
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/host1x')
78 files changed, 10975 insertions, 0 deletions
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig new file mode 100644 index 0000000000..e6c78ae200 --- /dev/null +++ b/drivers/gpu/host1x/Kconfig @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config TEGRA_HOST1X_CONTEXT_BUS + bool + +config TEGRA_HOST1X + tristate "NVIDIA Tegra host1x driver" + depends on ARCH_TEGRA || COMPILE_TEST + select DMA_SHARED_BUFFER + select TEGRA_HOST1X_CONTEXT_BUS + select IOMMU_IOVA + help + Driver for the NVIDIA Tegra host1x hardware. + + The Tegra host1x module is the DMA engine for register access to + Tegra's graphics- and multimedia-related modules. The modules served + by host1x are referred to as clients. host1x includes some other + functionality, such as synchronization. + +if TEGRA_HOST1X + +config TEGRA_HOST1X_FIREWALL + bool "Enable HOST1X security firewall" + default y + help + Say yes if kernel should protect command streams from tampering. + + If unsure, choose Y. + +endif diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile new file mode 100644 index 0000000000..ee5286ffe0 --- /dev/null +++ b/drivers/gpu/host1x/Makefile @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 +host1x-y = \ + bus.o \ + syncpt.o \ + dev.o \ + intr.o \ + cdma.o \ + channel.o \ + job.o \ + debug.o \ + mipi.o \ + fence.o \ + hw/host1x01.o \ + hw/host1x02.o \ + hw/host1x04.o \ + hw/host1x05.o \ + hw/host1x06.o \ + hw/host1x07.o \ + hw/host1x08.o + +host1x-$(CONFIG_IOMMU_API) += \ + context.o + +obj-$(CONFIG_TEGRA_HOST1X) += host1x.o +obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c new file mode 100644 index 0000000000..84d042796d --- /dev/null +++ b/drivers/gpu/host1x/bus.c @@ -0,0 +1,965 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012-2013, NVIDIA Corporation + */ + +#include <linux/debugfs.h> +#include <linux/dma-mapping.h> +#include <linux/host1x.h> +#include <linux/of.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/of_device.h> + +#include "bus.h" +#include "dev.h" + +static DEFINE_MUTEX(clients_lock); +static LIST_HEAD(clients); + +static DEFINE_MUTEX(drivers_lock); +static LIST_HEAD(drivers); + +static DEFINE_MUTEX(devices_lock); +static LIST_HEAD(devices); + +struct host1x_subdev { + struct host1x_client *client; + struct device_node *np; + struct list_head list; +}; + +/** + * host1x_subdev_add() - add a new subdevice with an associated device node + * @device: host1x device to add the subdevice to + * @driver: host1x driver containing the subdevices + * @np: device node + */ +static int host1x_subdev_add(struct host1x_device *device, + struct host1x_driver *driver, + struct device_node *np) +{ + struct host1x_subdev *subdev; + struct device_node *child; + int err; + + subdev = kzalloc(sizeof(*subdev), GFP_KERNEL); + if (!subdev) + return -ENOMEM; + + INIT_LIST_HEAD(&subdev->list); + subdev->np = of_node_get(np); + + mutex_lock(&device->subdevs_lock); + list_add_tail(&subdev->list, &device->subdevs); + mutex_unlock(&device->subdevs_lock); + + /* recursively add children */ + for_each_child_of_node(np, child) { + if (of_match_node(driver->subdevs, child) && + of_device_is_available(child)) { + err = host1x_subdev_add(device, driver, child); + if (err < 0) { + /* XXX cleanup? */ + of_node_put(child); + return err; + } + } + } + + return 0; +} + +/** + * host1x_subdev_del() - remove subdevice + * @subdev: subdevice to remove + */ +static void host1x_subdev_del(struct host1x_subdev *subdev) +{ + list_del(&subdev->list); + of_node_put(subdev->np); + kfree(subdev); +} + +/** + * host1x_device_parse_dt() - scan device tree and add matching subdevices + * @device: host1x logical device + * @driver: host1x driver + */ +static int host1x_device_parse_dt(struct host1x_device *device, + struct host1x_driver *driver) +{ + struct device_node *np; + int err; + + for_each_child_of_node(device->dev.parent->of_node, np) { + if (of_match_node(driver->subdevs, np) && + of_device_is_available(np)) { + err = host1x_subdev_add(device, driver, np); + if (err < 0) { + of_node_put(np); + return err; + } + } + } + + return 0; +} + +static void host1x_subdev_register(struct host1x_device *device, + struct host1x_subdev *subdev, + struct host1x_client *client) +{ + int err; + + /* + * Move the subdevice to the list of active (registered) subdevices + * and associate it with a client. At the same time, associate the + * client with its parent device. + */ + mutex_lock(&device->subdevs_lock); + mutex_lock(&device->clients_lock); + list_move_tail(&client->list, &device->clients); + list_move_tail(&subdev->list, &device->active); + client->host = &device->dev; + subdev->client = client; + mutex_unlock(&device->clients_lock); + mutex_unlock(&device->subdevs_lock); + + if (list_empty(&device->subdevs)) { + err = device_add(&device->dev); + if (err < 0) + dev_err(&device->dev, "failed to add: %d\n", err); + else + device->registered = true; + } +} + +static void __host1x_subdev_unregister(struct host1x_device *device, + struct host1x_subdev *subdev) +{ + struct host1x_client *client = subdev->client; + + /* + * If all subdevices have been activated, we're about to remove the + * first active subdevice, so unload the driver first. + */ + if (list_empty(&device->subdevs)) { + if (device->registered) { + device->registered = false; + device_del(&device->dev); + } + } + + /* + * Move the subdevice back to the list of idle subdevices and remove + * it from list of clients. + */ + mutex_lock(&device->clients_lock); + subdev->client = NULL; + client->host = NULL; + list_move_tail(&subdev->list, &device->subdevs); + /* + * XXX: Perhaps don't do this here, but rather explicitly remove it + * when the device is about to be deleted. + * + * This is somewhat complicated by the fact that this function is + * used to remove the subdevice when a client is unregistered but + * also when the composite device is about to be removed. + */ + list_del_init(&client->list); + mutex_unlock(&device->clients_lock); +} + +static void host1x_subdev_unregister(struct host1x_device *device, + struct host1x_subdev *subdev) +{ + mutex_lock(&device->subdevs_lock); + __host1x_subdev_unregister(device, subdev); + mutex_unlock(&device->subdevs_lock); +} + +/** + * host1x_device_init() - initialize a host1x logical device + * @device: host1x logical device + * + * The driver for the host1x logical device can call this during execution of + * its &host1x_driver.probe implementation to initialize each of its clients. + * The client drivers access the subsystem specific driver data using the + * &host1x_client.parent field and driver data associated with it (usually by + * calling dev_get_drvdata()). + */ +int host1x_device_init(struct host1x_device *device) +{ + struct host1x_client *client; + int err; + + mutex_lock(&device->clients_lock); + + list_for_each_entry(client, &device->clients, list) { + if (client->ops && client->ops->early_init) { + err = client->ops->early_init(client); + if (err < 0) { + dev_err(&device->dev, "failed to early initialize %s: %d\n", + dev_name(client->dev), err); + goto teardown_late; + } + } + } + + list_for_each_entry(client, &device->clients, list) { + if (client->ops && client->ops->init) { + err = client->ops->init(client); + if (err < 0) { + dev_err(&device->dev, + "failed to initialize %s: %d\n", + dev_name(client->dev), err); + goto teardown; + } + } + } + + mutex_unlock(&device->clients_lock); + + return 0; + +teardown: + list_for_each_entry_continue_reverse(client, &device->clients, list) + if (client->ops->exit) + client->ops->exit(client); + + /* reset client to end of list for late teardown */ + client = list_entry(&device->clients, struct host1x_client, list); + +teardown_late: + list_for_each_entry_continue_reverse(client, &device->clients, list) + if (client->ops->late_exit) + client->ops->late_exit(client); + + mutex_unlock(&device->clients_lock); + return err; +} +EXPORT_SYMBOL(host1x_device_init); + +/** + * host1x_device_exit() - uninitialize host1x logical device + * @device: host1x logical device + * + * When the driver for a host1x logical device is unloaded, it can call this + * function to tear down each of its clients. Typically this is done after a + * subsystem-specific data structure is removed and the functionality can no + * longer be used. + */ +int host1x_device_exit(struct host1x_device *device) +{ + struct host1x_client *client; + int err; + + mutex_lock(&device->clients_lock); + + list_for_each_entry_reverse(client, &device->clients, list) { + if (client->ops && client->ops->exit) { + err = client->ops->exit(client); + if (err < 0) { + dev_err(&device->dev, + "failed to cleanup %s: %d\n", + dev_name(client->dev), err); + mutex_unlock(&device->clients_lock); + return err; + } + } + } + + list_for_each_entry_reverse(client, &device->clients, list) { + if (client->ops && client->ops->late_exit) { + err = client->ops->late_exit(client); + if (err < 0) { + dev_err(&device->dev, "failed to late cleanup %s: %d\n", + dev_name(client->dev), err); + mutex_unlock(&device->clients_lock); + return err; + } + } + } + + mutex_unlock(&device->clients_lock); + + return 0; +} +EXPORT_SYMBOL(host1x_device_exit); + +static int host1x_add_client(struct host1x *host1x, + struct host1x_client *client) +{ + struct host1x_device *device; + struct host1x_subdev *subdev; + + mutex_lock(&host1x->devices_lock); + + list_for_each_entry(device, &host1x->devices, list) { + list_for_each_entry(subdev, &device->subdevs, list) { + if (subdev->np == client->dev->of_node) { + host1x_subdev_register(device, subdev, client); + mutex_unlock(&host1x->devices_lock); + return 0; + } + } + } + + mutex_unlock(&host1x->devices_lock); + return -ENODEV; +} + +static int host1x_del_client(struct host1x *host1x, + struct host1x_client *client) +{ + struct host1x_device *device, *dt; + struct host1x_subdev *subdev; + + mutex_lock(&host1x->devices_lock); + + list_for_each_entry_safe(device, dt, &host1x->devices, list) { + list_for_each_entry(subdev, &device->active, list) { + if (subdev->client == client) { + host1x_subdev_unregister(device, subdev); + mutex_unlock(&host1x->devices_lock); + return 0; + } + } + } + + mutex_unlock(&host1x->devices_lock); + return -ENODEV; +} + +static int host1x_device_match(struct device *dev, struct device_driver *drv) +{ + return strcmp(dev_name(dev), drv->name) == 0; +} + +/* + * Note that this is really only needed for backwards compatibility + * with libdrm, which parses this information from sysfs and will + * fail if it can't find the OF_FULLNAME, specifically. + */ +static int host1x_device_uevent(const struct device *dev, + struct kobj_uevent_env *env) +{ + of_device_uevent(dev->parent, env); + + return 0; +} + +static int host1x_dma_configure(struct device *dev) +{ + return of_dma_configure(dev, dev->of_node, true); +} + +static const struct dev_pm_ops host1x_device_pm_ops = { + .suspend = pm_generic_suspend, + .resume = pm_generic_resume, + .freeze = pm_generic_freeze, + .thaw = pm_generic_thaw, + .poweroff = pm_generic_poweroff, + .restore = pm_generic_restore, +}; + +struct bus_type host1x_bus_type = { + .name = "host1x", + .match = host1x_device_match, + .uevent = host1x_device_uevent, + .dma_configure = host1x_dma_configure, + .pm = &host1x_device_pm_ops, +}; + +static void __host1x_device_del(struct host1x_device *device) +{ + struct host1x_subdev *subdev, *sd; + struct host1x_client *client, *cl; + + mutex_lock(&device->subdevs_lock); + + /* unregister subdevices */ + list_for_each_entry_safe(subdev, sd, &device->active, list) { + /* + * host1x_subdev_unregister() will remove the client from + * any lists, so we'll need to manually add it back to the + * list of idle clients. + * + * XXX: Alternatively, perhaps don't remove the client from + * any lists in host1x_subdev_unregister() and instead do + * that explicitly from host1x_unregister_client()? + */ + client = subdev->client; + + __host1x_subdev_unregister(device, subdev); + + /* add the client to the list of idle clients */ + mutex_lock(&clients_lock); + list_add_tail(&client->list, &clients); + mutex_unlock(&clients_lock); + } + + /* remove subdevices */ + list_for_each_entry_safe(subdev, sd, &device->subdevs, list) + host1x_subdev_del(subdev); + + mutex_unlock(&device->subdevs_lock); + + /* move clients to idle list */ + mutex_lock(&clients_lock); + mutex_lock(&device->clients_lock); + + list_for_each_entry_safe(client, cl, &device->clients, list) + list_move_tail(&client->list, &clients); + + mutex_unlock(&device->clients_lock); + mutex_unlock(&clients_lock); + + /* finally remove the device */ + list_del_init(&device->list); +} + +static void host1x_device_release(struct device *dev) +{ + struct host1x_device *device = to_host1x_device(dev); + + __host1x_device_del(device); + kfree(device); +} + +static int host1x_device_add(struct host1x *host1x, + struct host1x_driver *driver) +{ + struct host1x_client *client, *tmp; + struct host1x_subdev *subdev; + struct host1x_device *device; + int err; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + device_initialize(&device->dev); + + mutex_init(&device->subdevs_lock); + INIT_LIST_HEAD(&device->subdevs); + INIT_LIST_HEAD(&device->active); + mutex_init(&device->clients_lock); + INIT_LIST_HEAD(&device->clients); + INIT_LIST_HEAD(&device->list); + device->driver = driver; + + device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask; + device->dev.dma_mask = &device->dev.coherent_dma_mask; + dev_set_name(&device->dev, "%s", driver->driver.name); + device->dev.release = host1x_device_release; + device->dev.bus = &host1x_bus_type; + device->dev.parent = host1x->dev; + + of_dma_configure(&device->dev, host1x->dev->of_node, true); + + device->dev.dma_parms = &device->dma_parms; + dma_set_max_seg_size(&device->dev, UINT_MAX); + + err = host1x_device_parse_dt(device, driver); + if (err < 0) { + kfree(device); + return err; + } + + list_add_tail(&device->list, &host1x->devices); + + mutex_lock(&clients_lock); + + list_for_each_entry_safe(client, tmp, &clients, list) { + list_for_each_entry(subdev, &device->subdevs, list) { + if (subdev->np == client->dev->of_node) { + host1x_subdev_register(device, subdev, client); + break; + } + } + } + + mutex_unlock(&clients_lock); + + return 0; +} + +/* + * Removes a device by first unregistering any subdevices and then removing + * itself from the list of devices. + * + * This function must be called with the host1x->devices_lock held. + */ +static void host1x_device_del(struct host1x *host1x, + struct host1x_device *device) +{ + if (device->registered) { + device->registered = false; + device_del(&device->dev); + } + + put_device(&device->dev); +} + +static void host1x_attach_driver(struct host1x *host1x, + struct host1x_driver *driver) +{ + struct host1x_device *device; + int err; + + mutex_lock(&host1x->devices_lock); + + list_for_each_entry(device, &host1x->devices, list) { + if (device->driver == driver) { + mutex_unlock(&host1x->devices_lock); + return; + } + } + + err = host1x_device_add(host1x, driver); + if (err < 0) + dev_err(host1x->dev, "failed to allocate device: %d\n", err); + + mutex_unlock(&host1x->devices_lock); +} + +static void host1x_detach_driver(struct host1x *host1x, + struct host1x_driver *driver) +{ + struct host1x_device *device, *tmp; + + mutex_lock(&host1x->devices_lock); + + list_for_each_entry_safe(device, tmp, &host1x->devices, list) + if (device->driver == driver) + host1x_device_del(host1x, device); + + mutex_unlock(&host1x->devices_lock); +} + +static int host1x_devices_show(struct seq_file *s, void *data) +{ + struct host1x *host1x = s->private; + struct host1x_device *device; + + mutex_lock(&host1x->devices_lock); + + list_for_each_entry(device, &host1x->devices, list) { + struct host1x_subdev *subdev; + + seq_printf(s, "%s\n", dev_name(&device->dev)); + + mutex_lock(&device->subdevs_lock); + + list_for_each_entry(subdev, &device->active, list) + seq_printf(s, " %pOFf: %s\n", subdev->np, + dev_name(subdev->client->dev)); + + list_for_each_entry(subdev, &device->subdevs, list) + seq_printf(s, " %pOFf:\n", subdev->np); + + mutex_unlock(&device->subdevs_lock); + } + + mutex_unlock(&host1x->devices_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(host1x_devices); + +/** + * host1x_register() - register a host1x controller + * @host1x: host1x controller + * + * The host1x controller driver uses this to register a host1x controller with + * the infrastructure. Note that all Tegra SoC generations have only ever come + * with a single host1x instance, so this function is somewhat academic. + */ +int host1x_register(struct host1x *host1x) +{ + struct host1x_driver *driver; + + mutex_lock(&devices_lock); + list_add_tail(&host1x->list, &devices); + mutex_unlock(&devices_lock); + + mutex_lock(&drivers_lock); + + list_for_each_entry(driver, &drivers, list) + host1x_attach_driver(host1x, driver); + + mutex_unlock(&drivers_lock); + + debugfs_create_file("devices", S_IRUGO, host1x->debugfs, host1x, + &host1x_devices_fops); + + return 0; +} + +/** + * host1x_unregister() - unregister a host1x controller + * @host1x: host1x controller + * + * The host1x controller driver uses this to remove a host1x controller from + * the infrastructure. + */ +int host1x_unregister(struct host1x *host1x) +{ + struct host1x_driver *driver; + + mutex_lock(&drivers_lock); + + list_for_each_entry(driver, &drivers, list) + host1x_detach_driver(host1x, driver); + + mutex_unlock(&drivers_lock); + + mutex_lock(&devices_lock); + list_del_init(&host1x->list); + mutex_unlock(&devices_lock); + + return 0; +} + +static int host1x_device_probe(struct device *dev) +{ + struct host1x_driver *driver = to_host1x_driver(dev->driver); + struct host1x_device *device = to_host1x_device(dev); + + if (driver->probe) + return driver->probe(device); + + return 0; +} + +static int host1x_device_remove(struct device *dev) +{ + struct host1x_driver *driver = to_host1x_driver(dev->driver); + struct host1x_device *device = to_host1x_device(dev); + + if (driver->remove) + return driver->remove(device); + + return 0; +} + +static void host1x_device_shutdown(struct device *dev) +{ + struct host1x_driver *driver = to_host1x_driver(dev->driver); + struct host1x_device *device = to_host1x_device(dev); + + if (driver->shutdown) + driver->shutdown(device); +} + +/** + * host1x_driver_register_full() - register a host1x driver + * @driver: host1x driver + * @owner: owner module + * + * Drivers for host1x logical devices call this function to register a driver + * with the infrastructure. Note that since these drive logical devices, the + * registration of the driver actually triggers tho logical device creation. + * A logical device will be created for each host1x instance. + */ +int host1x_driver_register_full(struct host1x_driver *driver, + struct module *owner) +{ + struct host1x *host1x; + + INIT_LIST_HEAD(&driver->list); + + mutex_lock(&drivers_lock); + list_add_tail(&driver->list, &drivers); + mutex_unlock(&drivers_lock); + + mutex_lock(&devices_lock); + + list_for_each_entry(host1x, &devices, list) + host1x_attach_driver(host1x, driver); + + mutex_unlock(&devices_lock); + + driver->driver.bus = &host1x_bus_type; + driver->driver.owner = owner; + driver->driver.probe = host1x_device_probe; + driver->driver.remove = host1x_device_remove; + driver->driver.shutdown = host1x_device_shutdown; + + return driver_register(&driver->driver); +} +EXPORT_SYMBOL(host1x_driver_register_full); + +/** + * host1x_driver_unregister() - unregister a host1x driver + * @driver: host1x driver + * + * Unbinds the driver from each of the host1x logical devices that it is + * bound to, effectively removing the subsystem devices that they represent. + */ +void host1x_driver_unregister(struct host1x_driver *driver) +{ + struct host1x *host1x; + + driver_unregister(&driver->driver); + + mutex_lock(&devices_lock); + + list_for_each_entry(host1x, &devices, list) + host1x_detach_driver(host1x, driver); + + mutex_unlock(&devices_lock); + + mutex_lock(&drivers_lock); + list_del_init(&driver->list); + mutex_unlock(&drivers_lock); +} +EXPORT_SYMBOL(host1x_driver_unregister); + +/** + * __host1x_client_init() - initialize a host1x client + * @client: host1x client + * @key: lock class key for the client-specific mutex + */ +void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key) +{ + host1x_bo_cache_init(&client->cache); + INIT_LIST_HEAD(&client->list); + __mutex_init(&client->lock, "host1x client lock", key); + client->usecount = 0; +} +EXPORT_SYMBOL(__host1x_client_init); + +/** + * host1x_client_exit() - uninitialize a host1x client + * @client: host1x client + */ +void host1x_client_exit(struct host1x_client *client) +{ + mutex_destroy(&client->lock); +} +EXPORT_SYMBOL(host1x_client_exit); + +/** + * __host1x_client_register() - register a host1x client + * @client: host1x client + * + * Registers a host1x client with each host1x controller instance. Note that + * each client will only match their parent host1x controller and will only be + * associated with that instance. Once all clients have been registered with + * their parent host1x controller, the infrastructure will set up the logical + * device and call host1x_device_init(), which will in turn call each client's + * &host1x_client_ops.init implementation. + */ +int __host1x_client_register(struct host1x_client *client) +{ + struct host1x *host1x; + int err; + + mutex_lock(&devices_lock); + + list_for_each_entry(host1x, &devices, list) { + err = host1x_add_client(host1x, client); + if (!err) { + mutex_unlock(&devices_lock); + return 0; + } + } + + mutex_unlock(&devices_lock); + + mutex_lock(&clients_lock); + list_add_tail(&client->list, &clients); + mutex_unlock(&clients_lock); + + return 0; +} +EXPORT_SYMBOL(__host1x_client_register); + +/** + * host1x_client_unregister() - unregister a host1x client + * @client: host1x client + * + * Removes a host1x client from its host1x controller instance. If a logical + * device has already been initialized, it will be torn down. + */ +void host1x_client_unregister(struct host1x_client *client) +{ + struct host1x_client *c; + struct host1x *host1x; + int err; + + mutex_lock(&devices_lock); + + list_for_each_entry(host1x, &devices, list) { + err = host1x_del_client(host1x, client); + if (!err) { + mutex_unlock(&devices_lock); + return; + } + } + + mutex_unlock(&devices_lock); + mutex_lock(&clients_lock); + + list_for_each_entry(c, &clients, list) { + if (c == client) { + list_del_init(&c->list); + break; + } + } + + mutex_unlock(&clients_lock); + + host1x_bo_cache_destroy(&client->cache); +} +EXPORT_SYMBOL(host1x_client_unregister); + +int host1x_client_suspend(struct host1x_client *client) +{ + int err = 0; + + mutex_lock(&client->lock); + + if (client->usecount == 1) { + if (client->ops && client->ops->suspend) { + err = client->ops->suspend(client); + if (err < 0) + goto unlock; + } + } + + client->usecount--; + dev_dbg(client->dev, "use count: %u\n", client->usecount); + + if (client->parent) { + err = host1x_client_suspend(client->parent); + if (err < 0) + goto resume; + } + + goto unlock; + +resume: + if (client->usecount == 0) + if (client->ops && client->ops->resume) + client->ops->resume(client); + + client->usecount++; +unlock: + mutex_unlock(&client->lock); + return err; +} +EXPORT_SYMBOL(host1x_client_suspend); + +int host1x_client_resume(struct host1x_client *client) +{ + int err = 0; + + mutex_lock(&client->lock); + + if (client->parent) { + err = host1x_client_resume(client->parent); + if (err < 0) + goto unlock; + } + + if (client->usecount == 0) { + if (client->ops && client->ops->resume) { + err = client->ops->resume(client); + if (err < 0) + goto suspend; + } + } + + client->usecount++; + dev_dbg(client->dev, "use count: %u\n", client->usecount); + + goto unlock; + +suspend: + if (client->parent) + host1x_client_suspend(client->parent); +unlock: + mutex_unlock(&client->lock); + return err; +} +EXPORT_SYMBOL(host1x_client_resume); + +struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir, + struct host1x_bo_cache *cache) +{ + struct host1x_bo_mapping *mapping; + + if (cache) { + mutex_lock(&cache->lock); + + list_for_each_entry(mapping, &cache->mappings, entry) { + if (mapping->bo == bo && mapping->direction == dir) { + kref_get(&mapping->ref); + goto unlock; + } + } + } + + mapping = bo->ops->pin(dev, bo, dir); + if (IS_ERR(mapping)) + goto unlock; + + spin_lock(&mapping->bo->lock); + list_add_tail(&mapping->list, &bo->mappings); + spin_unlock(&mapping->bo->lock); + + if (cache) { + INIT_LIST_HEAD(&mapping->entry); + mapping->cache = cache; + + list_add_tail(&mapping->entry, &cache->mappings); + + /* bump reference count to track the copy in the cache */ + kref_get(&mapping->ref); + } + +unlock: + if (cache) + mutex_unlock(&cache->lock); + + return mapping; +} +EXPORT_SYMBOL(host1x_bo_pin); + +static void __host1x_bo_unpin(struct kref *ref) +{ + struct host1x_bo_mapping *mapping = to_host1x_bo_mapping(ref); + + /* + * When the last reference of the mapping goes away, make sure to remove the mapping from + * the cache. + */ + if (mapping->cache) + list_del(&mapping->entry); + + spin_lock(&mapping->bo->lock); + list_del(&mapping->list); + spin_unlock(&mapping->bo->lock); + + mapping->bo->ops->unpin(mapping); +} + +void host1x_bo_unpin(struct host1x_bo_mapping *mapping) +{ + struct host1x_bo_cache *cache = mapping->cache; + + if (cache) + mutex_lock(&cache->lock); + + kref_put(&mapping->ref, __host1x_bo_unpin); + + if (cache) + mutex_unlock(&cache->lock); +} +EXPORT_SYMBOL(host1x_bo_unpin); diff --git a/drivers/gpu/host1x/bus.h b/drivers/gpu/host1x/bus.h new file mode 100644 index 0000000000..a4adf9abc3 --- /dev/null +++ b/drivers/gpu/host1x/bus.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Avionic Design GmbH + * Copyright (C) 2012-2013, NVIDIA Corporation + */ + +#ifndef HOST1X_BUS_H +#define HOST1X_BUS_H + +struct bus_type; +struct host1x; + +extern struct bus_type host1x_bus_type; + +int host1x_register(struct host1x *host1x); +int host1x_unregister(struct host1x *host1x); + +#endif diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c new file mode 100644 index 0000000000..d1336e438f --- /dev/null +++ b/drivers/gpu/host1x/cdma.c @@ -0,0 +1,693 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x Command DMA + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + + +#include <asm/cacheflush.h> +#include <linux/device.h> +#include <linux/dma-mapping.h> +#include <linux/host1x.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/kfifo.h> +#include <linux/slab.h> +#include <trace/events/host1x.h> + +#include "cdma.h" +#include "channel.h" +#include "dev.h" +#include "debug.h" +#include "job.h" + +/* + * push_buffer + * + * The push buffer is a circular array of words to be fetched by command DMA. + * Note that it works slightly differently to the sync queue; fence == pos + * means that the push buffer is full, not empty. + */ + +/* + * Typically the commands written into the push buffer are a pair of words. We + * use slots to represent each of these pairs and to simplify things. Note the + * strange number of slots allocated here. 512 slots will fit exactly within a + * single memory page. We also need one additional word at the end of the push + * buffer for the RESTART opcode that will instruct the CDMA to jump back to + * the beginning of the push buffer. With 512 slots, this means that we'll use + * 2 memory pages and waste 4092 bytes of the second page that will never be + * used. + */ +#define HOST1X_PUSHBUFFER_SLOTS 511 + +/* + * Clean up push buffer resources + */ +static void host1x_pushbuffer_destroy(struct push_buffer *pb) +{ + struct host1x_cdma *cdma = pb_to_cdma(pb); + struct host1x *host1x = cdma_to_host1x(cdma); + + if (!pb->mapped) + return; + + if (host1x->domain) { + iommu_unmap(host1x->domain, pb->dma, pb->alloc_size); + free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma)); + } + + dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys); + + pb->mapped = NULL; + pb->phys = 0; +} + +/* + * Init push buffer resources + */ +static int host1x_pushbuffer_init(struct push_buffer *pb) +{ + struct host1x_cdma *cdma = pb_to_cdma(pb); + struct host1x *host1x = cdma_to_host1x(cdma); + struct iova *alloc; + u32 size; + int err; + + pb->mapped = NULL; + pb->phys = 0; + pb->size = HOST1X_PUSHBUFFER_SLOTS * 8; + + size = pb->size + 4; + + /* initialize buffer pointers */ + pb->fence = pb->size - 8; + pb->pos = 0; + + if (host1x->domain) { + unsigned long shift; + + size = iova_align(&host1x->iova, size); + + pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, + GFP_KERNEL); + if (!pb->mapped) + return -ENOMEM; + + shift = iova_shift(&host1x->iova); + alloc = alloc_iova(&host1x->iova, size >> shift, + host1x->iova_end >> shift, true); + if (!alloc) { + err = -ENOMEM; + goto iommu_free_mem; + } + + pb->dma = iova_dma_addr(&host1x->iova, alloc); + err = iommu_map(host1x->domain, pb->dma, pb->phys, size, + IOMMU_READ, GFP_KERNEL); + if (err) + goto iommu_free_iova; + } else { + pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, + GFP_KERNEL); + if (!pb->mapped) + return -ENOMEM; + + pb->dma = pb->phys; + } + + pb->alloc_size = size; + + host1x_hw_pushbuffer_init(host1x, pb); + + return 0; + +iommu_free_iova: + __free_iova(&host1x->iova, alloc); +iommu_free_mem: + dma_free_wc(host1x->dev, size, pb->mapped, pb->phys); + + return err; +} + +/* + * Push two words to the push buffer + * Caller must ensure push buffer is not full + */ +static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) +{ + u32 *p = (u32 *)((void *)pb->mapped + pb->pos); + + WARN_ON(pb->pos == pb->fence); + *(p++) = op1; + *(p++) = op2; + pb->pos += 8; + + if (pb->pos >= pb->size) + pb->pos -= pb->size; +} + +/* + * Pop a number of two word slots from the push buffer + * Caller must ensure push buffer is not empty + */ +static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) +{ + /* Advance the next write position */ + pb->fence += slots * 8; + + if (pb->fence >= pb->size) + pb->fence -= pb->size; +} + +/* + * Return the number of two word slots free in the push buffer + */ +static u32 host1x_pushbuffer_space(struct push_buffer *pb) +{ + unsigned int fence = pb->fence; + + if (pb->fence < pb->pos) + fence += pb->size; + + return (fence - pb->pos) / 8; +} + +/* + * Sleep (if necessary) until the requested event happens + * - CDMA_EVENT_SYNC_QUEUE_EMPTY : sync queue is completely empty. + * - Returns 1 + * - CDMA_EVENT_PUSH_BUFFER_SPACE : there is space in the push buffer + * - Return the amount of space (> 0) + * Must be called with the cdma lock held. + */ +unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, + enum cdma_event event) +{ + for (;;) { + struct push_buffer *pb = &cdma->push_buffer; + unsigned int space; + + switch (event) { + case CDMA_EVENT_SYNC_QUEUE_EMPTY: + space = list_empty(&cdma->sync_queue) ? 1 : 0; + break; + + case CDMA_EVENT_PUSH_BUFFER_SPACE: + space = host1x_pushbuffer_space(pb); + break; + + default: + WARN_ON(1); + return -EINVAL; + } + + if (space) + return space; + + trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), + event); + + /* If somebody has managed to already start waiting, yield */ + if (cdma->event != CDMA_EVENT_NONE) { + mutex_unlock(&cdma->lock); + schedule(); + mutex_lock(&cdma->lock); + continue; + } + + cdma->event = event; + + mutex_unlock(&cdma->lock); + wait_for_completion(&cdma->complete); + mutex_lock(&cdma->lock); + } + + return 0; +} + +/* + * Sleep (if necessary) until the push buffer has enough free space. + * + * Must be called with the cdma lock held. + */ +static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, + struct host1x_cdma *cdma, + unsigned int needed) +{ + while (true) { + struct push_buffer *pb = &cdma->push_buffer; + unsigned int space; + + space = host1x_pushbuffer_space(pb); + if (space >= needed) + break; + + trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), + CDMA_EVENT_PUSH_BUFFER_SPACE); + + host1x_hw_cdma_flush(host1x, cdma); + + /* If somebody has managed to already start waiting, yield */ + if (cdma->event != CDMA_EVENT_NONE) { + mutex_unlock(&cdma->lock); + schedule(); + mutex_lock(&cdma->lock); + continue; + } + + cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE; + + mutex_unlock(&cdma->lock); + wait_for_completion(&cdma->complete); + mutex_lock(&cdma->lock); + } + + return 0; +} +/* + * Start timer that tracks the time spent by the job. + * Must be called with the cdma lock held. + */ +static void cdma_start_timer_locked(struct host1x_cdma *cdma, + struct host1x_job *job) +{ + if (cdma->timeout.client) { + /* timer already started */ + return; + } + + cdma->timeout.client = job->client; + cdma->timeout.syncpt = job->syncpt; + cdma->timeout.syncpt_val = job->syncpt_end; + cdma->timeout.start_ktime = ktime_get(); + + schedule_delayed_work(&cdma->timeout.wq, + msecs_to_jiffies(job->timeout)); +} + +/* + * Stop timer when a buffer submission completes. + * Must be called with the cdma lock held. + */ +static void stop_cdma_timer_locked(struct host1x_cdma *cdma) +{ + cancel_delayed_work(&cdma->timeout.wq); + cdma->timeout.client = NULL; +} + +/* + * For all sync queue entries that have already finished according to the + * current sync point registers: + * - unpin & unref their mems + * - pop their push buffer slots + * - remove them from the sync queue + * This is normally called from the host code's worker thread, but can be + * called manually if necessary. + * Must be called with the cdma lock held. + */ +static void update_cdma_locked(struct host1x_cdma *cdma) +{ + bool signal = false; + struct host1x_job *job, *n; + + /* + * Walk the sync queue, reading the sync point registers as necessary, + * to consume as many sync queue entries as possible without blocking + */ + list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { + struct host1x_syncpt *sp = job->syncpt; + + /* Check whether this syncpt has completed, and bail if not */ + if (!host1x_syncpt_is_expired(sp, job->syncpt_end) && + !job->cancelled) { + /* Start timer on next pending syncpt */ + if (job->timeout) + cdma_start_timer_locked(cdma, job); + + break; + } + + /* Cancel timeout, when a buffer completes */ + if (cdma->timeout.client) + stop_cdma_timer_locked(cdma); + + /* Unpin the memory */ + host1x_job_unpin(job); + + /* Pop push buffer slots */ + if (job->num_slots) { + struct push_buffer *pb = &cdma->push_buffer; + + host1x_pushbuffer_pop(pb, job->num_slots); + + if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE) + signal = true; + } + + list_del(&job->list); + host1x_job_put(job); + } + + if (cdma->event == CDMA_EVENT_SYNC_QUEUE_EMPTY && + list_empty(&cdma->sync_queue)) + signal = true; + + if (signal) { + cdma->event = CDMA_EVENT_NONE; + complete(&cdma->complete); + } +} + +void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, + struct device *dev) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + u32 restart_addr, syncpt_incrs, syncpt_val; + struct host1x_job *job, *next_job = NULL; + + syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); + + dev_dbg(dev, "%s: starting cleanup (thresh %d)\n", + __func__, syncpt_val); + + /* + * Move the sync_queue read pointer to the first entry that hasn't + * completed based on the current HW syncpt value. It's likely there + * won't be any (i.e. we're still at the head), but covers the case + * where a syncpt incr happens just prior/during the teardown. + */ + + dev_dbg(dev, "%s: skip completed buffers still in sync_queue\n", + __func__); + + list_for_each_entry(job, &cdma->sync_queue, list) { + if (syncpt_val < job->syncpt_end) { + + if (!list_is_last(&job->list, &cdma->sync_queue)) + next_job = list_next_entry(job, list); + + goto syncpt_incr; + } + + host1x_job_dump(dev, job); + } + + /* all jobs have been completed */ + job = NULL; + +syncpt_incr: + + /* + * Increment with CPU the remaining syncpts of a partially executed job. + * + * CDMA will continue execution starting with the next job or will get + * into idle state. + */ + if (next_job) + restart_addr = next_job->first_get; + else + restart_addr = cdma->last_pos; + + if (!job) + goto resume; + + /* do CPU increments for the remaining syncpts */ + if (job->syncpt_recovery) { + dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", + __func__); + + /* won't need a timeout when replayed */ + job->timeout = 0; + + syncpt_incrs = job->syncpt_end - syncpt_val; + dev_dbg(dev, "%s: CPU incr (%d)\n", __func__, syncpt_incrs); + + host1x_job_dump(dev, job); + + /* safe to use CPU to incr syncpts */ + host1x_hw_cdma_timeout_cpu_incr(host1x, cdma, job->first_get, + syncpt_incrs, job->syncpt_end, + job->num_slots); + + dev_dbg(dev, "%s: finished sync_queue modification\n", + __func__); + } else { + struct host1x_job *failed_job = job; + + host1x_job_dump(dev, job); + + host1x_syncpt_set_locked(job->syncpt); + failed_job->cancelled = true; + + list_for_each_entry_continue(job, &cdma->sync_queue, list) { + unsigned int i; + + if (job->syncpt != failed_job->syncpt) + continue; + + for (i = 0; i < job->num_slots; i++) { + unsigned int slot = (job->first_get/8 + i) % + HOST1X_PUSHBUFFER_SLOTS; + u32 *mapped = cdma->push_buffer.mapped; + + /* + * Overwrite opcodes with 0 word writes + * to offset 0xbad. This does nothing but + * has a easily detected signature in debug + * traces. + * + * On systems with MLOCK enforcement enabled, + * the above 0 word writes would fall foul of + * the enforcement. As such, in the first slot + * put a RESTART_W opcode to the beginning + * of the next job. We don't use this for older + * chips since those only support the RESTART + * opcode with inconvenient alignment requirements. + */ + if (i == 0 && host1x->info->has_wide_gather) { + unsigned int next_job = (job->first_get/8 + job->num_slots) + % HOST1X_PUSHBUFFER_SLOTS; + mapped[2*slot+0] = (0xd << 28) | (next_job * 2); + mapped[2*slot+1] = 0x0; + } else { + mapped[2*slot+0] = 0x1bad0000; + mapped[2*slot+1] = 0x1bad0000; + } + } + + job->cancelled = true; + } + + wmb(); + + update_cdma_locked(cdma); + } + +resume: + /* roll back DMAGET and start up channel again */ + host1x_hw_cdma_resume(host1x, cdma, restart_addr); +} + +static void cdma_update_work(struct work_struct *work) +{ + struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work); + + mutex_lock(&cdma->lock); + update_cdma_locked(cdma); + mutex_unlock(&cdma->lock); +} + +/* + * Create a cdma + */ +int host1x_cdma_init(struct host1x_cdma *cdma) +{ + int err; + + mutex_init(&cdma->lock); + init_completion(&cdma->complete); + INIT_WORK(&cdma->update_work, cdma_update_work); + + INIT_LIST_HEAD(&cdma->sync_queue); + + cdma->event = CDMA_EVENT_NONE; + cdma->running = false; + cdma->torndown = false; + + err = host1x_pushbuffer_init(&cdma->push_buffer); + if (err) + return err; + + return 0; +} + +/* + * Destroy a cdma + */ +int host1x_cdma_deinit(struct host1x_cdma *cdma) +{ + struct push_buffer *pb = &cdma->push_buffer; + struct host1x *host1x = cdma_to_host1x(cdma); + + if (cdma->running) { + pr_warn("%s: CDMA still running\n", __func__); + return -EBUSY; + } + + host1x_pushbuffer_destroy(pb); + host1x_hw_cdma_timeout_destroy(host1x, cdma); + + return 0; +} + +/* + * Begin a cdma submit + */ +int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + + mutex_lock(&cdma->lock); + + /* + * Check if syncpoint was locked due to previous job timeout. + * This needs to be done within the cdma lock to avoid a race + * with the timeout handler. + */ + if (job->syncpt->locked) { + mutex_unlock(&cdma->lock); + return -EPERM; + } + + if (job->timeout) { + /* init state on first submit with timeout value */ + if (!cdma->timeout.initialized) { + int err; + + err = host1x_hw_cdma_timeout_init(host1x, cdma); + if (err) { + mutex_unlock(&cdma->lock); + return err; + } + } + } + + if (!cdma->running) + host1x_hw_cdma_start(host1x, cdma); + + cdma->slots_free = 0; + cdma->slots_used = 0; + cdma->first_get = cdma->push_buffer.pos; + + trace_host1x_cdma_begin(dev_name(job->channel->dev)); + return 0; +} + +/* + * Push two words into a push buffer slot + * Blocks as necessary if the push buffer is full. + */ +void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + struct push_buffer *pb = &cdma->push_buffer; + u32 slots_free = cdma->slots_free; + + if (host1x_debug_trace_cmdbuf) + trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev), + op1, op2); + + if (slots_free == 0) { + host1x_hw_cdma_flush(host1x, cdma); + slots_free = host1x_cdma_wait_locked(cdma, + CDMA_EVENT_PUSH_BUFFER_SPACE); + } + + cdma->slots_free = slots_free - 1; + cdma->slots_used++; + host1x_pushbuffer_push(pb, op1, op2); +} + +/* + * Push four words into two consecutive push buffer slots. Note that extra + * care needs to be taken not to split the two slots across the end of the + * push buffer. Otherwise the RESTART opcode at the end of the push buffer + * that ensures processing will restart at the beginning will break up the + * four words. + * + * Blocks as necessary if the push buffer is full. + */ +void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, + u32 op3, u32 op4) +{ + struct host1x_channel *channel = cdma_to_channel(cdma); + struct host1x *host1x = cdma_to_host1x(cdma); + struct push_buffer *pb = &cdma->push_buffer; + unsigned int space = cdma->slots_free; + unsigned int needed = 2, extra = 0; + + if (host1x_debug_trace_cmdbuf) + trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2, + op3, op4); + + /* compute number of extra slots needed for padding */ + if (pb->pos + 16 > pb->size) { + extra = (pb->size - pb->pos) / 8; + needed += extra; + } + + host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed); + space = host1x_pushbuffer_space(pb); + + cdma->slots_free = space - needed; + cdma->slots_used += needed; + + if (extra > 0) { + /* + * If there isn't enough space at the tail of the pushbuffer, + * insert a RESTART(0) here to go back to the beginning. + * The code above adjusted the indexes appropriately. + */ + host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000); + } + + host1x_pushbuffer_push(pb, op1, op2); + host1x_pushbuffer_push(pb, op3, op4); +} + +/* + * End a cdma submit + * Kick off DMA, add job to the sync queue, and a number of slots to be freed + * from the pushbuffer. The handles for a submit must all be pinned at the same + * time, but they can be unpinned in smaller chunks. + */ +void host1x_cdma_end(struct host1x_cdma *cdma, + struct host1x_job *job) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + bool idle = list_empty(&cdma->sync_queue); + + host1x_hw_cdma_flush(host1x, cdma); + + job->first_get = cdma->first_get; + job->num_slots = cdma->slots_used; + host1x_job_get(job); + list_add_tail(&job->list, &cdma->sync_queue); + + /* start timer on idle -> active transitions */ + if (job->timeout && idle) + cdma_start_timer_locked(cdma, job); + + trace_host1x_cdma_end(dev_name(job->channel->dev)); + mutex_unlock(&cdma->lock); +} + +/* + * Update cdma state according to current sync point values + */ +void host1x_cdma_update(struct host1x_cdma *cdma) +{ + schedule_work(&cdma->update_work); +} diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h new file mode 100644 index 0000000000..7fd8168af4 --- /dev/null +++ b/drivers/gpu/host1x/cdma.h @@ -0,0 +1,94 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Command DMA + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + +#ifndef __HOST1X_CDMA_H +#define __HOST1X_CDMA_H + +#include <linux/sched.h> +#include <linux/completion.h> +#include <linux/list.h> +#include <linux/workqueue.h> + +struct host1x_syncpt; +struct host1x_userctx_timeout; +struct host1x_job; + +/* + * cdma + * + * This is in charge of a host command DMA channel. + * Sends ops to a push buffer, and takes responsibility for unpinning + * (& possibly freeing) of memory after those ops have completed. + * Producer: + * begin + * push - send ops to the push buffer + * end - start command DMA and enqueue handles to be unpinned + * Consumer: + * update - call to update sync queue and push buffer, unpin memory + */ + +struct push_buffer { + void *mapped; /* mapped pushbuffer memory */ + dma_addr_t dma; /* device address of pushbuffer */ + dma_addr_t phys; /* physical address of pushbuffer */ + u32 fence; /* index we've written */ + u32 pos; /* index to write to */ + u32 size; + u32 alloc_size; +}; + +struct buffer_timeout { + struct delayed_work wq; /* work queue */ + bool initialized; /* timer one-time setup flag */ + struct host1x_syncpt *syncpt; /* buffer completion syncpt */ + u32 syncpt_val; /* syncpt value when completed */ + ktime_t start_ktime; /* starting time */ + /* context timeout information */ + struct host1x_client *client; +}; + +enum cdma_event { + CDMA_EVENT_NONE, /* not waiting for any event */ + CDMA_EVENT_SYNC_QUEUE_EMPTY, /* wait for empty sync queue */ + CDMA_EVENT_PUSH_BUFFER_SPACE /* wait for space in push buffer */ +}; + +struct host1x_cdma { + struct mutex lock; /* controls access to shared state */ + struct completion complete; /* signalled when event occurs */ + enum cdma_event event; /* event that complete is waiting for */ + unsigned int slots_used; /* pb slots used in current submit */ + unsigned int slots_free; /* pb slots free in current submit */ + unsigned int first_get; /* DMAGET value, where submit begins */ + unsigned int last_pos; /* last value written to DMAPUT */ + struct push_buffer push_buffer; /* channel's push buffer */ + struct list_head sync_queue; /* job queue */ + struct buffer_timeout timeout; /* channel's timeout state/wq */ + bool running; + bool torndown; + struct work_struct update_work; +}; + +#define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma) +#define cdma_to_host1x(cdma) dev_get_drvdata(cdma_to_channel(cdma)->dev->parent) +#define pb_to_cdma(pb) container_of(pb, struct host1x_cdma, push_buffer) + +int host1x_cdma_init(struct host1x_cdma *cdma); +int host1x_cdma_deinit(struct host1x_cdma *cdma); +int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job); +void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2); +void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, + u32 op3, u32 op4); +void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job); +void host1x_cdma_update(struct host1x_cdma *cdma); +void host1x_cdma_peek(struct host1x_cdma *cdma, u32 dmaget, int slot, + u32 *out); +unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, + enum cdma_event event); +void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, + struct device *dev); +#endif diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c new file mode 100644 index 0000000000..2d0051d631 --- /dev/null +++ b/drivers/gpu/host1x/channel.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x Channel + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + +#include <linux/slab.h> +#include <linux/module.h> + +#include "channel.h" +#include "dev.h" +#include "job.h" + +/* Constructor for the host1x device list */ +int host1x_channel_list_init(struct host1x_channel_list *chlist, + unsigned int num_channels) +{ + chlist->channels = kcalloc(num_channels, sizeof(struct host1x_channel), + GFP_KERNEL); + if (!chlist->channels) + return -ENOMEM; + + chlist->allocated_channels = bitmap_zalloc(num_channels, GFP_KERNEL); + if (!chlist->allocated_channels) { + kfree(chlist->channels); + return -ENOMEM; + } + + return 0; +} + +void host1x_channel_list_free(struct host1x_channel_list *chlist) +{ + bitmap_free(chlist->allocated_channels); + kfree(chlist->channels); +} + +int host1x_job_submit(struct host1x_job *job) +{ + struct host1x *host = dev_get_drvdata(job->channel->dev->parent); + + return host1x_hw_channel_submit(host, job); +} +EXPORT_SYMBOL(host1x_job_submit); + +struct host1x_channel *host1x_channel_get(struct host1x_channel *channel) +{ + kref_get(&channel->refcount); + + return channel; +} +EXPORT_SYMBOL(host1x_channel_get); + +/** + * host1x_channel_get_index() - Attempt to get channel reference by index + * @host: Host1x device object + * @index: Index of channel + * + * If channel number @index is currently allocated, increase its refcount + * and return a pointer to it. Otherwise, return NULL. + */ +struct host1x_channel *host1x_channel_get_index(struct host1x *host, + unsigned int index) +{ + struct host1x_channel *ch = &host->channel_list.channels[index]; + + if (!kref_get_unless_zero(&ch->refcount)) + return NULL; + + return ch; +} + +void host1x_channel_stop(struct host1x_channel *channel) +{ + struct host1x *host = dev_get_drvdata(channel->dev->parent); + + host1x_hw_cdma_stop(host, &channel->cdma); +} +EXPORT_SYMBOL(host1x_channel_stop); + +static void release_channel(struct kref *kref) +{ + struct host1x_channel *channel = + container_of(kref, struct host1x_channel, refcount); + struct host1x *host = dev_get_drvdata(channel->dev->parent); + struct host1x_channel_list *chlist = &host->channel_list; + + host1x_hw_cdma_stop(host, &channel->cdma); + host1x_cdma_deinit(&channel->cdma); + + clear_bit(channel->id, chlist->allocated_channels); +} + +void host1x_channel_put(struct host1x_channel *channel) +{ + kref_put(&channel->refcount, release_channel); +} +EXPORT_SYMBOL(host1x_channel_put); + +static struct host1x_channel *acquire_unused_channel(struct host1x *host) +{ + struct host1x_channel_list *chlist = &host->channel_list; + unsigned int max_channels = host->info->nb_channels; + unsigned int index; + + index = find_first_zero_bit(chlist->allocated_channels, max_channels); + if (index >= max_channels) { + dev_err(host->dev, "failed to find free channel\n"); + return NULL; + } + + chlist->channels[index].id = index; + + set_bit(index, chlist->allocated_channels); + + return &chlist->channels[index]; +} + +/** + * host1x_channel_request() - Allocate a channel + * @client: Host1x client this channel will be used to send commands to + * + * Allocates a new host1x channel for @client. May return NULL if CDMA + * initialization fails. + */ +struct host1x_channel *host1x_channel_request(struct host1x_client *client) +{ + struct host1x *host = dev_get_drvdata(client->dev->parent); + struct host1x_channel_list *chlist = &host->channel_list; + struct host1x_channel *channel; + int err; + + channel = acquire_unused_channel(host); + if (!channel) + return NULL; + + kref_init(&channel->refcount); + mutex_init(&channel->submitlock); + channel->client = client; + channel->dev = client->dev; + + err = host1x_hw_channel_init(host, channel, channel->id); + if (err < 0) + goto fail; + + err = host1x_cdma_init(&channel->cdma); + if (err < 0) + goto fail; + + return channel; + +fail: + clear_bit(channel->id, chlist->allocated_channels); + + dev_err(client->dev, "failed to initialize channel\n"); + + return NULL; +} +EXPORT_SYMBOL(host1x_channel_request); diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h new file mode 100644 index 0000000000..39044ff6c3 --- /dev/null +++ b/drivers/gpu/host1x/channel.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Channel + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + +#ifndef __HOST1X_CHANNEL_H +#define __HOST1X_CHANNEL_H + +#include <linux/io.h> +#include <linux/kref.h> + +#include "cdma.h" + +struct host1x; +struct host1x_channel; + +struct host1x_channel_list { + struct host1x_channel *channels; + unsigned long *allocated_channels; +}; + +struct host1x_channel { + struct kref refcount; + unsigned int id; + struct mutex submitlock; + void __iomem *regs; + struct host1x_client *client; + struct device *dev; + struct host1x_cdma cdma; +}; + +/* channel list operations */ +int host1x_channel_list_init(struct host1x_channel_list *chlist, + unsigned int num_channels); +void host1x_channel_list_free(struct host1x_channel_list *chlist); +struct host1x_channel *host1x_channel_get_index(struct host1x *host, + unsigned int index); + +#endif diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c new file mode 100644 index 0000000000..955c971c52 --- /dev/null +++ b/drivers/gpu/host1x/context.c @@ -0,0 +1,176 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA Corporation. + */ + +#include <linux/device.h> +#include <linux/kref.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/pid.h> +#include <linux/slab.h> + +#include "context.h" +#include "dev.h" + +static void host1x_memory_context_release(struct device *dev) +{ + /* context device is freed in host1x_memory_context_list_free() */ +} + +int host1x_memory_context_list_init(struct host1x *host1x) +{ + struct host1x_memory_context_list *cdl = &host1x->context_list; + struct device_node *node = host1x->dev->of_node; + struct host1x_memory_context *ctx; + unsigned int i; + int err; + + cdl->devs = NULL; + cdl->len = 0; + mutex_init(&cdl->lock); + + err = of_property_count_u32_elems(node, "iommu-map"); + if (err < 0) + return 0; + + cdl->len = err / 4; + cdl->devs = kcalloc(cdl->len, sizeof(*cdl->devs), GFP_KERNEL); + if (!cdl->devs) + return -ENOMEM; + + for (i = 0; i < cdl->len; i++) { + ctx = &cdl->devs[i]; + + ctx->host = host1x; + + device_initialize(&ctx->dev); + + /* + * Due to an issue with T194 NVENC, only 38 bits can be used. + * Anyway, 256GiB of IOVA ought to be enough for anyone. + */ + ctx->dma_mask = DMA_BIT_MASK(38); + ctx->dev.dma_mask = &ctx->dma_mask; + ctx->dev.coherent_dma_mask = ctx->dma_mask; + dev_set_name(&ctx->dev, "host1x-ctx.%d", i); + ctx->dev.bus = &host1x_context_device_bus_type; + ctx->dev.parent = host1x->dev; + ctx->dev.release = host1x_memory_context_release; + + dma_set_max_seg_size(&ctx->dev, UINT_MAX); + + err = device_add(&ctx->dev); + if (err) { + dev_err(host1x->dev, "could not add context device %d: %d\n", i, err); + put_device(&ctx->dev); + goto unreg_devices; + } + + err = of_dma_configure_id(&ctx->dev, node, true, &i); + if (err) { + dev_err(host1x->dev, "IOMMU configuration failed for context device %d: %d\n", + i, err); + device_unregister(&ctx->dev); + goto unreg_devices; + } + + if (!tegra_dev_iommu_get_stream_id(&ctx->dev, &ctx->stream_id) || + !device_iommu_mapped(&ctx->dev)) { + dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i); + device_unregister(&ctx->dev); + + /* + * This means that if IOMMU is disabled but context devices + * are defined in the device tree, Host1x will fail to probe. + * That's probably OK in this time and age. + */ + err = -EINVAL; + + goto unreg_devices; + } + } + + return 0; + +unreg_devices: + while (i--) + device_unregister(&cdl->devs[i].dev); + + kfree(cdl->devs); + cdl->devs = NULL; + cdl->len = 0; + + return err; +} + +void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl) +{ + unsigned int i; + + for (i = 0; i < cdl->len; i++) + device_unregister(&cdl->devs[i].dev); + + kfree(cdl->devs); + cdl->len = 0; +} + +struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x, + struct device *dev, + struct pid *pid) +{ + struct host1x_memory_context_list *cdl = &host1x->context_list; + struct host1x_memory_context *free = NULL; + int i; + + if (!cdl->len) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&cdl->lock); + + for (i = 0; i < cdl->len; i++) { + struct host1x_memory_context *cd = &cdl->devs[i]; + + if (cd->dev.iommu->iommu_dev != dev->iommu->iommu_dev) + continue; + + if (cd->owner == pid) { + refcount_inc(&cd->ref); + mutex_unlock(&cdl->lock); + return cd; + } else if (!cd->owner && !free) { + free = cd; + } + } + + if (!free) { + mutex_unlock(&cdl->lock); + return ERR_PTR(-EBUSY); + } + + refcount_set(&free->ref, 1); + free->owner = get_pid(pid); + + mutex_unlock(&cdl->lock); + + return free; +} +EXPORT_SYMBOL_GPL(host1x_memory_context_alloc); + +void host1x_memory_context_get(struct host1x_memory_context *cd) +{ + refcount_inc(&cd->ref); +} +EXPORT_SYMBOL_GPL(host1x_memory_context_get); + +void host1x_memory_context_put(struct host1x_memory_context *cd) +{ + struct host1x_memory_context_list *cdl = &cd->host->context_list; + + if (refcount_dec_and_mutex_lock(&cd->ref, &cdl->lock)) { + put_pid(cd->owner); + cd->owner = NULL; + mutex_unlock(&cdl->lock); + } +} +EXPORT_SYMBOL_GPL(host1x_memory_context_put); diff --git a/drivers/gpu/host1x/context.h b/drivers/gpu/host1x/context.h new file mode 100644 index 0000000000..3e03bc1d3b --- /dev/null +++ b/drivers/gpu/host1x/context.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x context devices + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef __HOST1X_CONTEXT_H +#define __HOST1X_CONTEXT_H + +#include <linux/mutex.h> +#include <linux/refcount.h> + +struct host1x; + +extern struct bus_type host1x_context_device_bus_type; + +struct host1x_memory_context_list { + struct mutex lock; + struct host1x_memory_context *devs; + unsigned int len; +}; + +#ifdef CONFIG_IOMMU_API +int host1x_memory_context_list_init(struct host1x *host1x); +void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl); +#else +static inline int host1x_memory_context_list_init(struct host1x *host1x) +{ + return 0; +} + +static inline void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl) +{ +} +#endif + +#endif diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c new file mode 100644 index 0000000000..d9421179d7 --- /dev/null +++ b/drivers/gpu/host1x/context_bus.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA Corporation. + */ + +#include <linux/device.h> +#include <linux/of.h> + +struct bus_type host1x_context_device_bus_type = { + .name = "host1x-context", +}; +EXPORT_SYMBOL_GPL(host1x_context_device_bus_type); + +static int __init host1x_context_device_bus_init(void) +{ + int err; + + err = bus_register(&host1x_context_device_bus_type); + if (err < 0) { + pr_err("bus type registration failed: %d\n", err); + return err; + } + + return 0; +} +postcore_initcall(host1x_context_device_bus_init); diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c new file mode 100644 index 0000000000..a18cc8d8ca --- /dev/null +++ b/drivers/gpu/host1x/debug.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2010 Google, Inc. + * Author: Erik Gilling <konkers@android.com> + * + * Copyright (C) 2011-2013 NVIDIA Corporation + */ + +#include <linux/debugfs.h> +#include <linux/pm_runtime.h> +#include <linux/seq_file.h> +#include <linux/uaccess.h> + +#include <linux/io.h> + +#include "dev.h" +#include "debug.h" +#include "channel.h" + +static DEFINE_MUTEX(debug_lock); + +unsigned int host1x_debug_trace_cmdbuf; + +static pid_t host1x_debug_force_timeout_pid; +static u32 host1x_debug_force_timeout_val; +static u32 host1x_debug_force_timeout_channel; + +void host1x_debug_output(struct output *o, const char *fmt, ...) +{ + va_list args; + int len; + + va_start(args, fmt); + len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); + va_end(args); + + o->fn(o->ctx, o->buf, len, false); +} + +void host1x_debug_cont(struct output *o, const char *fmt, ...) +{ + va_list args; + int len; + + va_start(args, fmt); + len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); + va_end(args); + + o->fn(o->ctx, o->buf, len, true); +} + +static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) +{ + struct host1x *m = dev_get_drvdata(ch->dev->parent); + struct output *o = data; + int err; + + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return err; + + mutex_lock(&ch->cdma.lock); + mutex_lock(&debug_lock); + + if (show_fifo) + host1x_hw_show_channel_fifo(m, ch, o); + + host1x_hw_show_channel_cdma(m, ch, o); + + mutex_unlock(&debug_lock); + mutex_unlock(&ch->cdma.lock); + + pm_runtime_put(m->dev); + + return 0; +} + +static void show_syncpts(struct host1x *m, struct output *o, bool show_all) +{ + unsigned long irqflags; + struct list_head *pos; + unsigned int i; + int err; + + host1x_debug_output(o, "---- syncpts ----\n"); + + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return; + + for (i = 0; i < host1x_syncpt_nb_pts(m); i++) { + u32 max = host1x_syncpt_read_max(m->syncpt + i); + u32 min = host1x_syncpt_load(m->syncpt + i); + unsigned int waiters = 0; + + spin_lock_irqsave(&m->syncpt[i].fences.lock, irqflags); + list_for_each(pos, &m->syncpt[i].fences.list) + waiters++; + spin_unlock_irqrestore(&m->syncpt[i].fences.lock, irqflags); + + if (!kref_read(&m->syncpt[i].ref)) + continue; + + if (!show_all && !min && !max && !waiters) + continue; + + host1x_debug_output(o, + "id %u (%s) min %d max %d (%d waiters)\n", + i, m->syncpt[i].name, min, max, waiters); + } + + for (i = 0; i < host1x_syncpt_nb_bases(m); i++) { + u32 base_val; + + base_val = host1x_syncpt_load_wait_base(m->syncpt + i); + if (base_val) + host1x_debug_output(o, "waitbase id %u val %d\n", i, + base_val); + } + + pm_runtime_put(m->dev); + + host1x_debug_output(o, "\n"); +} + +static void show_all(struct host1x *m, struct output *o, bool show_fifo) +{ + unsigned int i; + + host1x_hw_show_mlocks(m, o); + show_syncpts(m, o, true); + host1x_debug_output(o, "---- channels ----\n"); + + for (i = 0; i < m->info->nb_channels; ++i) { + struct host1x_channel *ch = host1x_channel_get_index(m, i); + + if (ch) { + show_channel(ch, o, show_fifo); + host1x_channel_put(ch); + } + } +} + +static int host1x_debug_all_show(struct seq_file *s, void *unused) +{ + struct output o = { + .fn = write_to_seqfile, + .ctx = s + }; + + show_all(s->private, &o, true); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(host1x_debug_all); + +static int host1x_debug_show(struct seq_file *s, void *unused) +{ + struct output o = { + .fn = write_to_seqfile, + .ctx = s + }; + + show_all(s->private, &o, false); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(host1x_debug); + +static void host1x_debugfs_init(struct host1x *host1x) +{ + struct dentry *de = debugfs_create_dir("tegra-host1x", NULL); + + /* Store the created entry */ + host1x->debugfs = de; + + debugfs_create_file("status", S_IRUGO, de, host1x, &host1x_debug_fops); + debugfs_create_file("status_all", S_IRUGO, de, host1x, + &host1x_debug_all_fops); + + debugfs_create_u32("trace_cmdbuf", S_IRUGO|S_IWUSR, de, + &host1x_debug_trace_cmdbuf); + + host1x_hw_debug_init(host1x, de); + + debugfs_create_u32("force_timeout_pid", S_IRUGO|S_IWUSR, de, + &host1x_debug_force_timeout_pid); + debugfs_create_u32("force_timeout_val", S_IRUGO|S_IWUSR, de, + &host1x_debug_force_timeout_val); + debugfs_create_u32("force_timeout_channel", S_IRUGO|S_IWUSR, de, + &host1x_debug_force_timeout_channel); +} + +static void host1x_debugfs_exit(struct host1x *host1x) +{ + debugfs_remove_recursive(host1x->debugfs); +} + +void host1x_debug_init(struct host1x *host1x) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS)) + host1x_debugfs_init(host1x); +} + +void host1x_debug_deinit(struct host1x *host1x) +{ + if (IS_ENABLED(CONFIG_DEBUG_FS)) + host1x_debugfs_exit(host1x); +} + +void host1x_debug_dump(struct host1x *host1x) +{ + struct output o = { + .fn = write_to_printk + }; + + show_all(host1x, &o, true); +} + +void host1x_debug_dump_syncpts(struct host1x *host1x) +{ + struct output o = { + .fn = write_to_printk + }; + + show_syncpts(host1x, &o, false); +} diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h new file mode 100644 index 0000000000..62bd8a091f --- /dev/null +++ b/drivers/gpu/host1x/debug.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Debug + * + * Copyright (c) 2011-2013 NVIDIA Corporation. + */ +#ifndef __HOST1X_DEBUG_H +#define __HOST1X_DEBUG_H + +#include <linux/debugfs.h> +#include <linux/seq_file.h> + +struct host1x; + +struct output { + void (*fn)(void *ctx, const char *str, size_t len, bool cont); + void *ctx; + char buf[256]; +}; + +static inline void write_to_seqfile(void *ctx, const char *str, size_t len, + bool cont) +{ + seq_write((struct seq_file *)ctx, str, len); +} + +static inline void write_to_printk(void *ctx, const char *str, size_t len, + bool cont) +{ + if (cont) + pr_cont("%s", str); + else + pr_info("%s", str); +} + +void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, ...); +void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...); + +extern unsigned int host1x_debug_trace_cmdbuf; + +void host1x_debug_init(struct host1x *host1x); +void host1x_debug_deinit(struct host1x *host1x); +void host1x_debug_dump(struct host1x *host1x); +void host1x_debug_dump_syncpts(struct host1x *host1x); + +#endif diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c new file mode 100644 index 0000000000..7c6699aed7 --- /dev/null +++ b/drivers/gpu/host1x/dev.c @@ -0,0 +1,779 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x driver + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/io.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/slab.h> + +#include <soc/tegra/common.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/host1x.h> +#undef CREATE_TRACE_POINTS + +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include <asm/dma-iommu.h> +#endif + +#include "bus.h" +#include "channel.h" +#include "context.h" +#include "debug.h" +#include "dev.h" +#include "intr.h" + +#include "hw/host1x01.h" +#include "hw/host1x02.h" +#include "hw/host1x04.h" +#include "hw/host1x05.h" +#include "hw/host1x06.h" +#include "hw/host1x07.h" +#include "hw/host1x08.h" + +void host1x_common_writel(struct host1x *host1x, u32 v, u32 r) +{ + writel(v, host1x->common_regs + r); +} + +void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r) +{ + writel(v, host1x->hv_regs + r); +} + +u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r) +{ + return readl(host1x->hv_regs + r); +} + +void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) +{ + void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; + + writel(v, sync_regs + r); +} + +u32 host1x_sync_readl(struct host1x *host1x, u32 r) +{ + void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; + + return readl(sync_regs + r); +} + +void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) +{ + writel(v, ch->regs + r); +} + +u32 host1x_ch_readl(struct host1x_channel *ch, u32 r) +{ + return readl(ch->regs + r); +} + +static const struct host1x_info host1x01_info = { + .nb_channels = 8, + .nb_pts = 32, + .nb_mlocks = 16, + .nb_bases = 8, + .init = host1x01_init, + .sync_offset = 0x3000, + .dma_mask = DMA_BIT_MASK(32), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, + .reserve_vblank_syncpts = true, +}; + +static const struct host1x_info host1x02_info = { + .nb_channels = 9, + .nb_pts = 32, + .nb_mlocks = 16, + .nb_bases = 12, + .init = host1x02_init, + .sync_offset = 0x3000, + .dma_mask = DMA_BIT_MASK(32), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, + .reserve_vblank_syncpts = true, +}; + +static const struct host1x_info host1x04_info = { + .nb_channels = 12, + .nb_pts = 192, + .nb_mlocks = 16, + .nb_bases = 64, + .init = host1x04_init, + .sync_offset = 0x2100, + .dma_mask = DMA_BIT_MASK(34), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, + .reserve_vblank_syncpts = false, +}; + +static const struct host1x_info host1x05_info = { + .nb_channels = 14, + .nb_pts = 192, + .nb_mlocks = 16, + .nb_bases = 64, + .init = host1x05_init, + .sync_offset = 0x2100, + .dma_mask = DMA_BIT_MASK(34), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, + .reserve_vblank_syncpts = false, +}; + +static const struct host1x_sid_entry tegra186_sid_table[] = { + { + /* VIC */ + .base = 0x1af0, + .offset = 0x30, + .limit = 0x34 + }, + { + /* NVDEC */ + .base = 0x1b00, + .offset = 0x30, + .limit = 0x34 + }, +}; + +static const struct host1x_info host1x06_info = { + .nb_channels = 63, + .nb_pts = 576, + .nb_mlocks = 24, + .nb_bases = 16, + .init = host1x06_init, + .sync_offset = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, + .has_hypervisor = true, + .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), + .sid_table = tegra186_sid_table, + .reserve_vblank_syncpts = false, +}; + +static const struct host1x_sid_entry tegra194_sid_table[] = { + { + /* VIC */ + .base = 0x1af0, + .offset = 0x30, + .limit = 0x34 + }, + { + /* NVDEC */ + .base = 0x1b00, + .offset = 0x30, + .limit = 0x34 + }, + { + /* NVDEC1 */ + .base = 0x1bc0, + .offset = 0x30, + .limit = 0x34 + }, +}; + +static const struct host1x_info host1x07_info = { + .nb_channels = 63, + .nb_pts = 704, + .nb_mlocks = 32, + .nb_bases = 0, + .init = host1x07_init, + .sync_offset = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, + .has_hypervisor = true, + .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), + .sid_table = tegra194_sid_table, + .reserve_vblank_syncpts = false, +}; + +/* + * Tegra234 has two stream ID protection tables, one for setting stream IDs + * through the channel path via SETSTREAMID, and one for setting them via + * MMIO. We program each engine's data stream ID in the channel path table + * and firmware stream ID in the MMIO path table. + */ +static const struct host1x_sid_entry tegra234_sid_table[] = { + { + /* VIC channel */ + .base = 0x17b8, + .offset = 0x30, + .limit = 0x30 + }, + { + /* VIC MMIO */ + .base = 0x1688, + .offset = 0x34, + .limit = 0x34 + }, + { + /* NVDEC channel */ + .base = 0x17c8, + .offset = 0x30, + .limit = 0x30, + }, + { + /* NVDEC MMIO */ + .base = 0x1698, + .offset = 0x34, + .limit = 0x34, + }, +}; + +static const struct host1x_info host1x08_info = { + .nb_channels = 63, + .nb_pts = 1024, + .nb_mlocks = 24, + .nb_bases = 0, + .init = host1x08_init, + .sync_offset = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, + .has_hypervisor = true, + .has_common = true, + .num_sid_entries = ARRAY_SIZE(tegra234_sid_table), + .sid_table = tegra234_sid_table, + .streamid_vm_table = { 0x1004, 128 }, + .classid_vm_table = { 0x1404, 25 }, + .mmio_vm_table = { 0x1504, 25 }, + .reserve_vblank_syncpts = false, +}; + +static const struct of_device_id host1x_of_match[] = { + { .compatible = "nvidia,tegra234-host1x", .data = &host1x08_info, }, + { .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, }, + { .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, }, + { .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, }, + { .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, }, + { .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, }, + { .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, }, + { .compatible = "nvidia,tegra20-host1x", .data = &host1x01_info, }, + { }, +}; +MODULE_DEVICE_TABLE(of, host1x_of_match); + +static void host1x_setup_virtualization_tables(struct host1x *host) +{ + const struct host1x_info *info = host->info; + unsigned int i; + + if (!info->has_hypervisor) + return; + + for (i = 0; i < info->num_sid_entries; i++) { + const struct host1x_sid_entry *entry = &info->sid_table[i]; + + host1x_hypervisor_writel(host, entry->offset, entry->base); + host1x_hypervisor_writel(host, entry->limit, entry->base + 4); + } + + for (i = 0; i < info->streamid_vm_table.count; i++) { + /* Allow access to all stream IDs to all VMs. */ + host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i); + } + + for (i = 0; i < info->classid_vm_table.count; i++) { + /* Allow access to all classes to all VMs. */ + host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i); + } + + for (i = 0; i < info->mmio_vm_table.count; i++) { + /* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */ + host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i); + } +} + +static bool host1x_wants_iommu(struct host1x *host1x) +{ + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + + /* + * If we support addressing a maximum of 32 bits of physical memory + * and if the host1x firewall is enabled, there's no need to enable + * IOMMU support. This can happen for example on Tegra20, Tegra30 + * and Tegra114. + * + * Tegra124 and later can address up to 34 bits of physical memory and + * many platforms come equipped with more than 2 GiB of system memory, + * which requires crossing the 4 GiB boundary. But there's a catch: on + * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can + * only address up to 32 bits of memory in GATHER opcodes, which means + * that command buffers need to either be in the first 2 GiB of system + * memory (which could quickly lead to memory exhaustion), or command + * buffers need to be treated differently from other buffers (which is + * not possible with the current ABI). + * + * A third option is to use the IOMMU in these cases to make sure all + * buffers will be mapped into a 32-bit IOVA space that host1x can + * address. This allows all of the system memory to be used and works + * within the limitations of the host1x on these SoCs. + * + * In summary, default to enable IOMMU on Tegra124 and later. For any + * of the earlier SoCs, only use the IOMMU for additional safety when + * the host1x firewall is disabled. + */ + if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) { + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + return false; + } + + return true; +} + +static struct iommu_domain *host1x_iommu_attach(struct host1x *host) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); + int err; + +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (host->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(host->dev); + arm_iommu_detach_device(host->dev); + arm_iommu_release_mapping(mapping); + + domain = iommu_get_domain_for_dev(host->dev); + } +#endif + + /* + * We may not always want to enable IOMMU support (for example if the + * host1x firewall is already enabled and we don't support addressing + * more than 32 bits of physical memory), so check for that first. + * + * Similarly, if host1x is already attached to an IOMMU (via the DMA + * API), don't try to attach again. + */ + if (!host1x_wants_iommu(host) || domain) + return domain; + + host->group = iommu_group_get(host->dev); + if (host->group) { + struct iommu_domain_geometry *geometry; + dma_addr_t start, end; + unsigned long order; + + err = iova_cache_get(); + if (err < 0) + goto put_group; + + host->domain = iommu_domain_alloc(&platform_bus_type); + if (!host->domain) { + err = -ENOMEM; + goto put_cache; + } + + err = iommu_attach_group(host->domain, host->group); + if (err) { + if (err == -ENODEV) + err = 0; + + goto free_domain; + } + + geometry = &host->domain->geometry; + start = geometry->aperture_start & host->info->dma_mask; + end = geometry->aperture_end & host->info->dma_mask; + + order = __ffs(host->domain->pgsize_bitmap); + init_iova_domain(&host->iova, 1UL << order, start >> order); + host->iova_end = end; + + domain = host->domain; + } + + return domain; + +free_domain: + iommu_domain_free(host->domain); + host->domain = NULL; +put_cache: + iova_cache_put(); +put_group: + iommu_group_put(host->group); + host->group = NULL; + + return ERR_PTR(err); +} + +static int host1x_iommu_init(struct host1x *host) +{ + u64 mask = host->info->dma_mask; + struct iommu_domain *domain; + int err; + + domain = host1x_iommu_attach(host); + if (IS_ERR(domain)) { + err = PTR_ERR(domain); + dev_err(host->dev, "failed to attach to IOMMU: %d\n", err); + return err; + } + + /* + * If we're not behind an IOMMU make sure we don't get push buffers + * that are allocated outside of the range addressable by the GATHER + * opcode. + * + * Newer generations of Tegra (Tegra186 and later) support a wide + * variant of the GATHER opcode that allows addressing more bits. + */ + if (!domain && !host->info->has_wide_gather) + mask = DMA_BIT_MASK(32); + + err = dma_coerce_mask_and_coherent(host->dev, mask); + if (err < 0) { + dev_err(host->dev, "failed to set DMA mask: %d\n", err); + return err; + } + + return 0; +} + +static void host1x_iommu_exit(struct host1x *host) +{ + if (host->domain) { + put_iova_domain(&host->iova); + iommu_detach_group(host->domain, host->group); + + iommu_domain_free(host->domain); + host->domain = NULL; + + iova_cache_put(); + + iommu_group_put(host->group); + host->group = NULL; + } +} + +static int host1x_get_resets(struct host1x *host) +{ + int err; + + host->resets[0].id = "mc"; + host->resets[1].id = "host1x"; + host->nresets = ARRAY_SIZE(host->resets); + + err = devm_reset_control_bulk_get_optional_exclusive_released( + host->dev, host->nresets, host->resets); + if (err) { + dev_err(host->dev, "failed to get reset: %d\n", err); + return err; + } + + return 0; +} + +static int host1x_probe(struct platform_device *pdev) +{ + struct host1x *host; + int err; + + host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); + if (!host) + return -ENOMEM; + + host->info = of_device_get_match_data(&pdev->dev); + + if (host->info->has_hypervisor) { + host->regs = devm_platform_ioremap_resource_byname(pdev, "vm"); + if (IS_ERR(host->regs)) + return PTR_ERR(host->regs); + + host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor"); + if (IS_ERR(host->hv_regs)) + return PTR_ERR(host->hv_regs); + + if (host->info->has_common) { + host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common"); + if (IS_ERR(host->common_regs)) + return PTR_ERR(host->common_regs); + } + } else { + host->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(host->regs)) + return PTR_ERR(host->regs); + } + + host->syncpt_irq = platform_get_irq(pdev, 0); + if (host->syncpt_irq < 0) + return host->syncpt_irq; + + mutex_init(&host->devices_lock); + INIT_LIST_HEAD(&host->devices); + INIT_LIST_HEAD(&host->list); + host->dev = &pdev->dev; + + /* set common host1x device data */ + platform_set_drvdata(pdev, host); + + host->dev->dma_parms = &host->dma_parms; + dma_set_max_seg_size(host->dev, UINT_MAX); + + if (host->info->init) { + err = host->info->init(host); + if (err) + return err; + } + + host->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(host->clk)) { + err = PTR_ERR(host->clk); + + if (err != -EPROBE_DEFER) + dev_err(&pdev->dev, "failed to get clock: %d\n", err); + + return err; + } + + err = host1x_get_resets(host); + if (err) + return err; + + host1x_bo_cache_init(&host->cache); + + err = host1x_iommu_init(host); + if (err < 0) { + dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); + goto destroy_cache; + } + + err = host1x_channel_list_init(&host->channel_list, + host->info->nb_channels); + if (err) { + dev_err(&pdev->dev, "failed to initialize channel list\n"); + goto iommu_exit; + } + + err = host1x_memory_context_list_init(host); + if (err) { + dev_err(&pdev->dev, "failed to initialize context list\n"); + goto free_channels; + } + + err = host1x_syncpt_init(host); + if (err) { + dev_err(&pdev->dev, "failed to initialize syncpts\n"); + goto free_contexts; + } + + err = host1x_intr_init(host); + if (err) { + dev_err(&pdev->dev, "failed to initialize interrupts\n"); + goto deinit_syncpt; + } + + pm_runtime_enable(&pdev->dev); + + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + goto pm_disable; + + /* the driver's code isn't ready yet for the dynamic RPM */ + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + goto pm_disable; + + host1x_debug_init(host); + + err = host1x_register(host); + if (err < 0) + goto deinit_debugfs; + + err = devm_of_platform_populate(&pdev->dev); + if (err < 0) + goto unregister; + + return 0; + +unregister: + host1x_unregister(host); +deinit_debugfs: + host1x_debug_deinit(host); + + pm_runtime_put_sync_suspend(&pdev->dev); +pm_disable: + pm_runtime_disable(&pdev->dev); + + host1x_intr_deinit(host); +deinit_syncpt: + host1x_syncpt_deinit(host); +free_contexts: + host1x_memory_context_list_free(&host->context_list); +free_channels: + host1x_channel_list_free(&host->channel_list); +iommu_exit: + host1x_iommu_exit(host); +destroy_cache: + host1x_bo_cache_destroy(&host->cache); + + return err; +} + +static int host1x_remove(struct platform_device *pdev) +{ + struct host1x *host = platform_get_drvdata(pdev); + + host1x_unregister(host); + host1x_debug_deinit(host); + + pm_runtime_force_suspend(&pdev->dev); + + host1x_intr_deinit(host); + host1x_syncpt_deinit(host); + host1x_memory_context_list_free(&host->context_list); + host1x_channel_list_free(&host->channel_list); + host1x_iommu_exit(host); + host1x_bo_cache_destroy(&host->cache); + + return 0; +} + +static int __maybe_unused host1x_runtime_suspend(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + host1x_intr_stop(host); + host1x_syncpt_save(host); + + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } + + usleep_range(1000, 2000); + + clk_disable_unprepare(host->clk); + reset_control_bulk_release(host->nresets, host->resets); + + return 0; + +resume_host1x: + host1x_setup_virtualization_tables(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + + return err; +} + +static int __maybe_unused host1x_runtime_resume(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(host->clk); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + err = reset_control_bulk_deassert(host->nresets, host->resets); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + host1x_setup_virtualization_tables(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + + return 0; + +disable_clk: + clk_disable_unprepare(host->clk); +release_reset: + reset_control_bulk_release(host->nresets, host->resets); + + return err; +} + +static const struct dev_pm_ops host1x_pm_ops = { + SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume, + NULL) + /* TODO: add system suspend-resume once driver will be ready for that */ +}; + +static struct platform_driver tegra_host1x_driver = { + .driver = { + .name = "tegra-host1x", + .of_match_table = host1x_of_match, + .pm = &host1x_pm_ops, + }, + .probe = host1x_probe, + .remove = host1x_remove, +}; + +static struct platform_driver * const drivers[] = { + &tegra_host1x_driver, + &tegra_mipi_driver, +}; + +static int __init tegra_host1x_init(void) +{ + int err; + + err = bus_register(&host1x_bus_type); + if (err < 0) + return err; + + err = platform_register_drivers(drivers, ARRAY_SIZE(drivers)); + if (err < 0) + bus_unregister(&host1x_bus_type); + + return err; +} +module_init(tegra_host1x_init); + +static void __exit tegra_host1x_exit(void) +{ + platform_unregister_drivers(drivers, ARRAY_SIZE(drivers)); + bus_unregister(&host1x_bus_type); +} +module_exit(tegra_host1x_exit); + +/** + * host1x_get_dma_mask() - query the supported DMA mask for host1x + * @host1x: host1x instance + * + * Note that this returns the supported DMA mask for host1x, which can be + * different from the applicable DMA mask under certain circumstances. + */ +u64 host1x_get_dma_mask(struct host1x *host1x) +{ + return host1x->info->dma_mask; +} +EXPORT_SYMBOL(host1x_get_dma_mask); + +MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>"); +MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>"); +MODULE_DESCRIPTION("Host1x driver for Tegra products"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h new file mode 100644 index 0000000000..75de50fe03 --- /dev/null +++ b/drivers/gpu/host1x/dev.h @@ -0,0 +1,350 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2015, NVIDIA Corporation. + */ + +#ifndef HOST1X_DEV_H +#define HOST1X_DEV_H + +#include <linux/device.h> +#include <linux/iommu.h> +#include <linux/iova.h> +#include <linux/platform_device.h> +#include <linux/reset.h> + +#include "cdma.h" +#include "channel.h" +#include "context.h" +#include "intr.h" +#include "job.h" +#include "syncpt.h" + +struct host1x_syncpt; +struct host1x_syncpt_base; +struct host1x_channel; +struct host1x_cdma; +struct host1x_job; +struct push_buffer; +struct output; +struct dentry; + +struct host1x_channel_ops { + int (*init)(struct host1x_channel *channel, struct host1x *host, + unsigned int id); + int (*submit)(struct host1x_job *job); +}; + +struct host1x_cdma_ops { + void (*start)(struct host1x_cdma *cdma); + void (*stop)(struct host1x_cdma *cdma); + void (*flush)(struct host1x_cdma *cdma); + int (*timeout_init)(struct host1x_cdma *cdma); + void (*timeout_destroy)(struct host1x_cdma *cdma); + void (*freeze)(struct host1x_cdma *cdma); + void (*resume)(struct host1x_cdma *cdma, u32 getptr); + void (*timeout_cpu_incr)(struct host1x_cdma *cdma, u32 getptr, + u32 syncpt_incrs, u32 syncval, u32 nr_slots); +}; + +struct host1x_pushbuffer_ops { + void (*init)(struct push_buffer *pb); +}; + +struct host1x_debug_ops { + void (*debug_init)(struct dentry *de); + void (*show_channel_cdma)(struct host1x *host, + struct host1x_channel *ch, + struct output *o); + void (*show_channel_fifo)(struct host1x *host, + struct host1x_channel *ch, + struct output *o); + void (*show_mlocks)(struct host1x *host, struct output *output); + +}; + +struct host1x_syncpt_ops { + void (*restore)(struct host1x_syncpt *syncpt); + void (*restore_wait_base)(struct host1x_syncpt *syncpt); + void (*load_wait_base)(struct host1x_syncpt *syncpt); + u32 (*load)(struct host1x_syncpt *syncpt); + int (*cpu_incr)(struct host1x_syncpt *syncpt); + void (*assign_to_channel)(struct host1x_syncpt *syncpt, + struct host1x_channel *channel); + void (*enable_protection)(struct host1x *host); +}; + +struct host1x_intr_ops { + int (*init_host_sync)(struct host1x *host, u32 cpm); + void (*set_syncpt_threshold)( + struct host1x *host, unsigned int id, u32 thresh); + void (*enable_syncpt_intr)(struct host1x *host, unsigned int id); + void (*disable_syncpt_intr)(struct host1x *host, unsigned int id); + void (*disable_all_syncpt_intrs)(struct host1x *host); + int (*free_syncpt_irq)(struct host1x *host); +}; + +struct host1x_sid_entry { + unsigned int base; + unsigned int offset; + unsigned int limit; +}; + +struct host1x_table_desc { + unsigned int base; + unsigned int count; +}; + +struct host1x_info { + unsigned int nb_channels; /* host1x: number of channels supported */ + unsigned int nb_pts; /* host1x: number of syncpoints supported */ + unsigned int nb_bases; /* host1x: number of syncpoint bases supported */ + unsigned int nb_mlocks; /* host1x: number of mlocks supported */ + int (*init)(struct host1x *host1x); /* initialize per SoC ops */ + unsigned int sync_offset; /* offset of syncpoint registers */ + u64 dma_mask; /* mask of addressable memory */ + bool has_wide_gather; /* supports GATHER_W opcode */ + bool has_hypervisor; /* has hypervisor registers */ + bool has_common; /* has common registers separate from hypervisor */ + unsigned int num_sid_entries; + const struct host1x_sid_entry *sid_table; + struct host1x_table_desc streamid_vm_table; + struct host1x_table_desc classid_vm_table; + struct host1x_table_desc mmio_vm_table; + /* + * On T20-T148, the boot chain may setup DC to increment syncpoints + * 26/27 on VBLANK. As such we cannot use these syncpoints until + * the display driver disables VBLANK increments. + */ + bool reserve_vblank_syncpts; +}; + +struct host1x { + const struct host1x_info *info; + + void __iomem *regs; + void __iomem *hv_regs; /* hypervisor region */ + void __iomem *common_regs; + int syncpt_irq; + struct host1x_syncpt *syncpt; + struct host1x_syncpt_base *bases; + struct device *dev; + struct clk *clk; + struct reset_control_bulk_data resets[2]; + unsigned int nresets; + + struct iommu_group *group; + struct iommu_domain *domain; + struct iova_domain iova; + dma_addr_t iova_end; + + struct mutex intr_mutex; + + const struct host1x_syncpt_ops *syncpt_op; + const struct host1x_intr_ops *intr_op; + const struct host1x_channel_ops *channel_op; + const struct host1x_cdma_ops *cdma_op; + const struct host1x_pushbuffer_ops *cdma_pb_op; + const struct host1x_debug_ops *debug_op; + + struct host1x_syncpt *nop_sp; + + struct mutex syncpt_mutex; + + struct host1x_channel_list channel_list; + struct host1x_memory_context_list context_list; + + struct dentry *debugfs; + + struct mutex devices_lock; + struct list_head devices; + + struct list_head list; + + struct device_dma_parameters dma_parms; + + struct host1x_bo_cache cache; +}; + +void host1x_common_writel(struct host1x *host1x, u32 v, u32 r); +void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v); +u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r); +void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v); +u32 host1x_sync_readl(struct host1x *host1x, u32 r); +void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v); +u32 host1x_ch_readl(struct host1x_channel *ch, u32 r); + +static inline void host1x_hw_syncpt_restore(struct host1x *host, + struct host1x_syncpt *sp) +{ + host->syncpt_op->restore(sp); +} + +static inline void host1x_hw_syncpt_restore_wait_base(struct host1x *host, + struct host1x_syncpt *sp) +{ + host->syncpt_op->restore_wait_base(sp); +} + +static inline void host1x_hw_syncpt_load_wait_base(struct host1x *host, + struct host1x_syncpt *sp) +{ + host->syncpt_op->load_wait_base(sp); +} + +static inline u32 host1x_hw_syncpt_load(struct host1x *host, + struct host1x_syncpt *sp) +{ + return host->syncpt_op->load(sp); +} + +static inline int host1x_hw_syncpt_cpu_incr(struct host1x *host, + struct host1x_syncpt *sp) +{ + return host->syncpt_op->cpu_incr(sp); +} + +static inline void host1x_hw_syncpt_assign_to_channel( + struct host1x *host, struct host1x_syncpt *sp, + struct host1x_channel *ch) +{ + return host->syncpt_op->assign_to_channel(sp, ch); +} + +static inline void host1x_hw_syncpt_enable_protection(struct host1x *host) +{ + return host->syncpt_op->enable_protection(host); +} + +static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm) +{ + return host->intr_op->init_host_sync(host, cpm); +} + +static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host, + unsigned int id, + u32 thresh) +{ + host->intr_op->set_syncpt_threshold(host, id, thresh); +} + +static inline void host1x_hw_intr_enable_syncpt_intr(struct host1x *host, + unsigned int id) +{ + host->intr_op->enable_syncpt_intr(host, id); +} + +static inline void host1x_hw_intr_disable_syncpt_intr(struct host1x *host, + unsigned int id) +{ + host->intr_op->disable_syncpt_intr(host, id); +} + +static inline void host1x_hw_intr_disable_all_syncpt_intrs(struct host1x *host) +{ + host->intr_op->disable_all_syncpt_intrs(host); +} + +static inline int host1x_hw_intr_free_syncpt_irq(struct host1x *host) +{ + return host->intr_op->free_syncpt_irq(host); +} + +static inline int host1x_hw_channel_init(struct host1x *host, + struct host1x_channel *channel, + unsigned int id) +{ + return host->channel_op->init(channel, host, id); +} + +static inline int host1x_hw_channel_submit(struct host1x *host, + struct host1x_job *job) +{ + return host->channel_op->submit(job); +} + +static inline void host1x_hw_cdma_start(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->start(cdma); +} + +static inline void host1x_hw_cdma_stop(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->stop(cdma); +} + +static inline void host1x_hw_cdma_flush(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->flush(cdma); +} + +static inline int host1x_hw_cdma_timeout_init(struct host1x *host, + struct host1x_cdma *cdma) +{ + return host->cdma_op->timeout_init(cdma); +} + +static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->timeout_destroy(cdma); +} + +static inline void host1x_hw_cdma_freeze(struct host1x *host, + struct host1x_cdma *cdma) +{ + host->cdma_op->freeze(cdma); +} + +static inline void host1x_hw_cdma_resume(struct host1x *host, + struct host1x_cdma *cdma, u32 getptr) +{ + host->cdma_op->resume(cdma, getptr); +} + +static inline void host1x_hw_cdma_timeout_cpu_incr(struct host1x *host, + struct host1x_cdma *cdma, + u32 getptr, + u32 syncpt_incrs, + u32 syncval, u32 nr_slots) +{ + host->cdma_op->timeout_cpu_incr(cdma, getptr, syncpt_incrs, syncval, + nr_slots); +} + +static inline void host1x_hw_pushbuffer_init(struct host1x *host, + struct push_buffer *pb) +{ + host->cdma_pb_op->init(pb); +} + +static inline void host1x_hw_debug_init(struct host1x *host, struct dentry *de) +{ + if (host->debug_op && host->debug_op->debug_init) + host->debug_op->debug_init(de); +} + +static inline void host1x_hw_show_channel_cdma(struct host1x *host, + struct host1x_channel *channel, + struct output *o) +{ + host->debug_op->show_channel_cdma(host, channel, o); +} + +static inline void host1x_hw_show_channel_fifo(struct host1x *host, + struct host1x_channel *channel, + struct output *o) +{ + host->debug_op->show_channel_fifo(host, channel, o); +} + +static inline void host1x_hw_show_mlocks(struct host1x *host, struct output *o) +{ + host->debug_op->show_mlocks(host, o); +} + +extern struct platform_driver tegra_mipi_driver; + +#endif diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c new file mode 100644 index 0000000000..139ad1afd9 --- /dev/null +++ b/drivers/gpu/host1x/fence.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Syncpoint dma_fence implementation + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#include <linux/dma-fence.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/sync_file.h> + +#include "fence.h" +#include "intr.h" +#include "syncpt.h" + +static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f) +{ + return "host1x"; +} + +static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f) +{ + return "syncpoint"; +} + +static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f) +{ + return container_of(f, struct host1x_syncpt_fence, base); +} + +static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + if (host1x_syncpt_is_expired(sf->sp, sf->threshold)) + return false; + + /* Reference for interrupt path. */ + dma_fence_get(f); + + /* + * The dma_fence framework requires the fence driver to keep a + * reference to any fences for which 'enable_signaling' has been + * called (and that have not been signalled). + * + * We cannot currently always guarantee that all fences get signalled + * or cancelled. As such, for such situations, set up a timeout, so + * that long-lasting fences will get reaped eventually. + */ + if (sf->timeout) { + /* Reference for timeout path. */ + dma_fence_get(f); + schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000)); + } + + host1x_intr_add_fence_locked(sf->sp->host, sf); + + /* + * The fence may get signalled at any time after the above call, + * so we need to initialize all state used by signalling + * before it. + */ + + return true; +} + +static const struct dma_fence_ops host1x_syncpt_fence_ops = { + .get_driver_name = host1x_syncpt_fence_get_driver_name, + .get_timeline_name = host1x_syncpt_fence_get_timeline_name, + .enable_signaling = host1x_syncpt_fence_enable_signaling, +}; + +void host1x_fence_signal(struct host1x_syncpt_fence *f) +{ + if (atomic_xchg(&f->signaling, 1)) { + /* + * Already on timeout path, but we removed the fence before + * timeout path could, so drop interrupt path reference. + */ + dma_fence_put(&f->base); + return; + } + + if (f->timeout && cancel_delayed_work(&f->timeout_work)) { + /* + * We know that the timeout path will not be entered. + * Safe to drop the timeout path's reference now. + */ + dma_fence_put(&f->base); + } + + dma_fence_signal_locked(&f->base); + dma_fence_put(&f->base); +} + +static void do_fence_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = (struct delayed_work *)work; + struct host1x_syncpt_fence *f = + container_of(dwork, struct host1x_syncpt_fence, timeout_work); + + if (atomic_xchg(&f->signaling, 1)) { + /* Already on interrupt path, drop timeout path reference if any. */ + if (f->timeout) + dma_fence_put(&f->base); + return; + } + + if (host1x_intr_remove_fence(f->sp->host, f)) { + /* + * Managed to remove fence from queue, so it's safe to drop + * the interrupt path's reference. + */ + dma_fence_put(&f->base); + } + + dma_fence_set_error(&f->base, -ETIMEDOUT); + dma_fence_signal(&f->base); + if (f->timeout) + dma_fence_put(&f->base); +} + +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold, + bool timeout) +{ + struct host1x_syncpt_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->sp = sp; + fence->threshold = threshold; + fence->timeout = timeout; + + dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &sp->fences.lock, + dma_fence_context_alloc(1), 0); + + INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout); + + return &fence->base; +} +EXPORT_SYMBOL(host1x_fence_create); + +void host1x_fence_cancel(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + schedule_delayed_work(&sf->timeout_work, 0); + flush_delayed_work(&sf->timeout_work); +} +EXPORT_SYMBOL(host1x_fence_cancel); diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h new file mode 100644 index 0000000000..f3c644c73c --- /dev/null +++ b/drivers/gpu/host1x/fence.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef HOST1X_FENCE_H +#define HOST1X_FENCE_H + +struct host1x_syncpt_fence { + struct dma_fence base; + + atomic_t signaling; + + struct host1x_syncpt *sp; + u32 threshold; + bool timeout; + + struct delayed_work timeout_work; + + struct list_head list; +}; + +struct host1x_fence_list { + spinlock_t lock; + struct list_head list; +}; + +void host1x_fence_signal(struct host1x_syncpt_fence *fence); + +#endif diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c new file mode 100644 index 0000000000..1b65a10b9d --- /dev/null +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -0,0 +1,365 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x Command DMA + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + +#include <linux/slab.h> +#include <linux/scatterlist.h> +#include <linux/dma-mapping.h> + +#include "../cdma.h" +#include "../channel.h" +#include "../dev.h" +#include "../debug.h" + +/* + * Put the restart at the end of pushbuffer memory + */ +static void push_buffer_init(struct push_buffer *pb) +{ + *(u32 *)(pb->mapped + pb->size) = host1x_opcode_restart(0); +} + +/* + * Increment timedout buffer's syncpt via CPU. + */ +static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, + u32 syncpt_incrs, u32 syncval, u32 nr_slots) +{ + unsigned int i; + + for (i = 0; i < syncpt_incrs; i++) + host1x_syncpt_incr(cdma->timeout.syncpt); + + /* after CPU incr, ensure shadow is up to date */ + host1x_syncpt_load(cdma->timeout.syncpt); +} + +/* + * Start channel DMA + */ +static void cdma_start(struct host1x_cdma *cdma) +{ + struct host1x_channel *ch = cdma_to_channel(cdma); + u64 start, end; + + if (cdma->running) + return; + + cdma->last_pos = cdma->push_buffer.pos; + start = cdma->push_buffer.dma; + end = cdma->push_buffer.size + 4; + + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + + /* set base, put and end pointer */ + host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI); +#endif + host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMAPUT_HI); +#endif + host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI); +#endif + + /* reset GET */ + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP | + HOST1X_CHANNEL_DMACTRL_DMAGETRST | + HOST1X_CHANNEL_DMACTRL_DMAINITGET, + HOST1X_CHANNEL_DMACTRL); + + /* start the command DMA */ + host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL); + + cdma->running = true; +} + +/* + * Similar to cdma_start(), but rather than starting from an idle + * state (where DMA GET is set to DMA PUT), on a timeout we restore + * DMA GET from an explicit value (so DMA may again be pending). + */ +static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + struct host1x_channel *ch = cdma_to_channel(cdma); + u64 start, end; + + if (cdma->running) + return; + + cdma->last_pos = cdma->push_buffer.pos; + + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + + start = cdma->push_buffer.dma; + end = cdma->push_buffer.size + 4; + + /* set base, end pointer (all of memory) */ + host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI); +#endif + host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI); +#endif + + /* set GET, by loading the value in PUT (then reset GET) */ + host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT); + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP | + HOST1X_CHANNEL_DMACTRL_DMAGETRST | + HOST1X_CHANNEL_DMACTRL_DMAINITGET, + HOST1X_CHANNEL_DMACTRL); + + dev_dbg(host1x->dev, + "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", __func__, + host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET), + host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT), + cdma->last_pos); + + /* deassert GET reset and set PUT */ + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); + + /* start the command DMA */ + host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMACTRL); + + cdma->running = true; +} + +/* + * Kick channel DMA into action by writing its PUT offset (if it has changed) + */ +static void cdma_flush(struct host1x_cdma *cdma) +{ + struct host1x_channel *ch = cdma_to_channel(cdma); + + if (cdma->push_buffer.pos != cdma->last_pos) { + host1x_ch_writel(ch, cdma->push_buffer.pos, + HOST1X_CHANNEL_DMAPUT); + cdma->last_pos = cdma->push_buffer.pos; + } +} + +static void cdma_stop(struct host1x_cdma *cdma) +{ + struct host1x_channel *ch = cdma_to_channel(cdma); + + mutex_lock(&cdma->lock); + + if (cdma->running) { + host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY); + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + cdma->running = false; + } + + mutex_unlock(&cdma->lock); +} + +static void cdma_hw_cmdproc_stop(struct host1x *host, struct host1x_channel *ch, + bool stop) +{ +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, stop ? 0x1 : 0x0, HOST1X_CHANNEL_CMDPROC_STOP); +#else + u32 cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP); + if (stop) + cmdproc_stop |= BIT(ch->id); + else + cmdproc_stop &= ~BIT(ch->id); + host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); +#endif +} + +static void cdma_hw_teardown(struct host1x *host, struct host1x_channel *ch) +{ +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, 0x1, HOST1X_CHANNEL_TEARDOWN); +#else + host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN); +#endif +} + +/* + * Stops both channel's command processor and CDMA immediately. + * Also, tears down the channel and resets corresponding module. + */ +static void cdma_freeze(struct host1x_cdma *cdma) +{ + struct host1x *host = cdma_to_host1x(cdma); + struct host1x_channel *ch = cdma_to_channel(cdma); + + if (cdma->torndown && !cdma->running) { + dev_warn(host->dev, "Already torn down\n"); + return; + } + + dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id); + + cdma_hw_cmdproc_stop(host, ch, true); + + dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", + __func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET), + host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT), + cdma->last_pos); + + host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, + HOST1X_CHANNEL_DMACTRL); + + cdma_hw_teardown(host, ch); + + cdma->running = false; + cdma->torndown = true; +} + +static void cdma_resume(struct host1x_cdma *cdma, u32 getptr) +{ + struct host1x *host1x = cdma_to_host1x(cdma); + struct host1x_channel *ch = cdma_to_channel(cdma); + + dev_dbg(host1x->dev, + "resuming channel (id %u, DMAGET restart = 0x%x)\n", + ch->id, getptr); + + cdma_hw_cmdproc_stop(host1x, ch, false); + + cdma->torndown = false; + cdma_timeout_restart(cdma, getptr); +} + +static void timeout_release_mlock(struct host1x_cdma *cdma) +{ +#if HOST1X_HW >= 8 + /* Tegra186 and Tegra194 require a more complicated MLOCK release + * sequence. Furthermore, those chips by default don't enforce MLOCKs, + * so it turns out that if we don't /actually/ need MLOCKs, we can just + * ignore them. + * + * As such, for now just implement this on Tegra234 where things are + * stricter but also easy to implement. + */ + struct host1x_channel *ch = cdma_to_channel(cdma); + struct host1x *host1x = cdma_to_host1x(cdma); + u32 offset; + + switch (ch->client->class) { + case HOST1X_CLASS_VIC: + offset = HOST1X_COMMON_VIC_MLOCK; + break; + case HOST1X_CLASS_NVDEC: + offset = HOST1X_COMMON_NVDEC_MLOCK; + break; + default: + WARN(1, "%s was not updated for class %u", __func__, ch->client->class); + return; + } + + host1x_common_writel(host1x, 0x0, offset); +#endif +} + +/* + * If this timeout fires, it indicates the current sync_queue entry has + * exceeded its TTL and the userctx should be timed out and remaining + * submits already issued cleaned up (future submits return an error). + */ +static void cdma_timeout_handler(struct work_struct *work) +{ + u32 syncpt_val; + struct host1x_cdma *cdma; + struct host1x *host1x; + struct host1x_channel *ch; + + cdma = container_of(to_delayed_work(work), struct host1x_cdma, + timeout.wq); + host1x = cdma_to_host1x(cdma); + ch = cdma_to_channel(cdma); + + host1x_debug_dump(cdma_to_host1x(cdma)); + + mutex_lock(&cdma->lock); + + if (!cdma->timeout.client) { + dev_dbg(host1x->dev, + "cdma_timeout: expired, but has no clientid\n"); + mutex_unlock(&cdma->lock); + return; + } + + /* stop processing to get a clean snapshot */ + cdma_hw_cmdproc_stop(host1x, ch, true); + + syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); + + /* has buffer actually completed? */ + if ((s32)(syncpt_val - cdma->timeout.syncpt_val) >= 0) { + dev_dbg(host1x->dev, + "cdma_timeout: expired, but buffer had completed\n"); + /* restore */ + cdma_hw_cmdproc_stop(host1x, ch, false); + mutex_unlock(&cdma->lock); + return; + } + + dev_warn(host1x->dev, "%s: timeout: %u (%s), HW thresh %d, done %d\n", + __func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name, + syncpt_val, cdma->timeout.syncpt_val); + + /* stop HW, resetting channel/module */ + host1x_hw_cdma_freeze(host1x, cdma); + + /* release any held MLOCK */ + timeout_release_mlock(cdma); + + host1x_cdma_update_sync_queue(cdma, ch->dev); + mutex_unlock(&cdma->lock); +} + +/* + * Init timeout resources + */ +static int cdma_timeout_init(struct host1x_cdma *cdma) +{ + INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler); + cdma->timeout.initialized = true; + + return 0; +} + +/* + * Clean up timeout resources + */ +static void cdma_timeout_destroy(struct host1x_cdma *cdma) +{ + if (cdma->timeout.initialized) + cancel_delayed_work(&cdma->timeout.wq); + + cdma->timeout.initialized = false; +} + +static const struct host1x_cdma_ops host1x_cdma_ops = { + .start = cdma_start, + .stop = cdma_stop, + .flush = cdma_flush, + + .timeout_init = cdma_timeout_init, + .timeout_destroy = cdma_timeout_destroy, + .freeze = cdma_freeze, + .resume = cdma_resume, + .timeout_cpu_incr = cdma_timeout_cpu_incr, +}; + +static const struct host1x_pushbuffer_ops host1x_pushbuffer_ops = { + .init = push_buffer_init, +}; diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c new file mode 100644 index 0000000000..d44b8de890 --- /dev/null +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x Channel + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + +#include <linux/host1x.h> +#include <linux/iommu.h> +#include <linux/slab.h> + +#include <trace/events/host1x.h> + +#include "../channel.h" +#include "../dev.h" +#include "../intr.h" +#include "../job.h" + +#define TRACE_MAX_LENGTH 128U + +static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, + u32 offset, u32 words) +{ + struct device *dev = cdma_to_channel(cdma)->dev; + void *mem = NULL; + + if (host1x_debug_trace_cmdbuf) + mem = host1x_bo_mmap(bo); + + if (mem) { + u32 i; + /* + * Write in batches of 128 as there seems to be a limit + * of how much you can output to ftrace at once. + */ + for (i = 0; i < words; i += TRACE_MAX_LENGTH) { + u32 num_words = min(words - i, TRACE_MAX_LENGTH); + + offset += i * sizeof(u32); + + trace_host1x_cdma_push_gather(dev_name(dev), bo, + num_words, offset, + mem); + } + + host1x_bo_munmap(bo, mem); + } +} + +static void submit_wait(struct host1x_job *job, u32 id, u32 threshold, + u32 next_class) +{ + struct host1x_cdma *cdma = &job->channel->cdma; + +#if HOST1X_HW >= 6 + u32 stream_id; + + /* + * If a memory context has been set, use it. Otherwise + * (if context isolation is disabled) use the engine's + * firmware stream ID. + */ + if (job->memory_context) + stream_id = job->memory_context->stream_id; + else + stream_id = job->engine_fallback_streamid; + + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ + BIT(0) | BIT(2) + ), + threshold, + id, + HOST1X_OPCODE_NOP + ); + host1x_cdma_push_wide(&job->channel->cdma, + host1x_opcode_setclass(job->class, 0, 0), + host1x_opcode_setpayload(stream_id), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4), + HOST1X_OPCODE_NOP); +#elif HOST1X_HW >= 2 + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ + BIT(0) | BIT(2) + ), + threshold, + id, + host1x_opcode_setclass(next_class, 0, 0) + ); +#else + /* TODO add waitchk or use waitbases or other mitigation */ + host1x_cdma_push(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), + BIT(0) + ), + host1x_class_host_wait_syncpt(id, threshold) + ); + host1x_cdma_push(cdma, + host1x_opcode_setclass(next_class, 0, 0), + HOST1X_OPCODE_NOP + ); +#endif +} + +static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) +{ + struct host1x_cdma *cdma = &job->channel->cdma; +#if HOST1X_HW < 6 + struct device *dev = job->channel->dev; +#endif + unsigned int i; + u32 threshold; + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_cmd *cmd = &job->cmds[i]; + + if (cmd->is_wait) { + if (cmd->wait.relative) + threshold = job_syncpt_base + cmd->wait.threshold; + else + threshold = cmd->wait.threshold; + + submit_wait(job, cmd->wait.id, threshold, cmd->wait.next_class); + } else { + struct host1x_job_gather *g = &cmd->gather; + + dma_addr_t addr = g->base + g->offset; + u32 op2, op3; + + op2 = lower_32_bits(addr); + op3 = upper_32_bits(addr); + + trace_write_gather(cdma, g->bo, g->offset, g->words); + + if (op3 != 0) { +#if HOST1X_HW >= 6 + u32 op1 = host1x_opcode_gather_wide(g->words); + u32 op4 = HOST1X_OPCODE_NOP; + + host1x_cdma_push_wide(cdma, op1, op2, op3, op4); +#else + dev_err(dev, "invalid gather for push buffer %pad\n", + &addr); + continue; +#endif + } else { + u32 op1 = host1x_opcode_gather(g->words); + + host1x_cdma_push(cdma, op1, op2); + } + } + } +} + +static inline void synchronize_syncpt_base(struct host1x_job *job) +{ + struct host1x_syncpt *sp = job->syncpt; + unsigned int id; + u32 value; + + value = host1x_syncpt_read_max(sp); + id = sp->base->id; + + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_setclass(HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_BASE, 1), + HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(id) | + HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value)); +} + +static void host1x_channel_set_streamid(struct host1x_channel *channel) +{ +#if HOST1X_HW >= 6 + u32 stream_id; + + if (!tegra_dev_iommu_get_stream_id(channel->dev->parent, &stream_id)) + stream_id = TEGRA_STREAM_ID_BYPASS; + + host1x_ch_writel(channel, stream_id, HOST1X_CHANNEL_SMMU_STREAMID); +#endif +} + +static void host1x_enable_gather_filter(struct host1x_channel *ch) +{ +#if HOST1X_HW >= 6 + struct host1x *host = dev_get_drvdata(ch->dev->parent); + u32 val; + + if (!host->hv_regs) + return; + + val = host1x_hypervisor_readl( + host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); + val |= BIT(ch->id % 32); + host1x_hypervisor_writel( + host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); +#elif HOST1X_HW >= 4 + host1x_ch_writel(ch, + HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), + HOST1X_CHANNEL_CHANNELCTRL); +#endif +} + +static void channel_program_cdma(struct host1x_job *job) +{ + struct host1x_cdma *cdma = &job->channel->cdma; + struct host1x_syncpt *sp = job->syncpt; + +#if HOST1X_HW >= 6 + u32 fence; + + /* Enter engine class with invalid stream ID. */ + host1x_cdma_push_wide(cdma, + host1x_opcode_acquire_mlock(job->class), + host1x_opcode_setclass(job->class, 0, 0), + host1x_opcode_setpayload(0), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4)); + + /* Before switching stream ID to real stream ID, ensure engine is idle. */ + fence = host1x_syncpt_incr_max(sp, 1); + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), + HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence, job->class); + + /* Submit work. */ + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); + submit_gathers(job, job->syncpt_end - job->syncpt_incrs); + + /* Before releasing MLOCK, ensure engine is idle again. */ + fence = host1x_syncpt_incr_max(sp, 1); + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), + HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence, job->class); + + /* Release MLOCK. */ + host1x_cdma_push(cdma, + HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class)); +#else + if (job->serialize) { + /* + * Force serialization by inserting a host wait for the + * previous job to finish before this one can commence. + */ + host1x_cdma_push(cdma, + host1x_opcode_setclass(HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), 1), + host1x_class_host_wait_syncpt(job->syncpt->id, + host1x_syncpt_read_max(sp))); + } + + /* Synchronize base register to allow using it for relative waiting */ + if (sp->base) + synchronize_syncpt_base(job); + + /* add a setclass for modules that require it */ + if (job->class) + host1x_cdma_push(cdma, + host1x_opcode_setclass(job->class, 0, 0), + HOST1X_OPCODE_NOP); + + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); + + submit_gathers(job, job->syncpt_end - job->syncpt_incrs); +#endif +} + +static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb); + + /* Schedules CDMA update. */ + host1x_cdma_update(&job->channel->cdma); +} + +static int channel_submit(struct host1x_job *job) +{ + struct host1x_channel *ch = job->channel; + struct host1x_syncpt *sp = job->syncpt; + u32 prev_max = 0; + u32 syncval; + int err; + struct host1x *host = dev_get_drvdata(ch->dev->parent); + + trace_host1x_channel_submit(dev_name(ch->dev), + job->num_cmds, job->num_relocs, + job->syncpt->id, job->syncpt_incrs); + + /* before error checks, return current max */ + prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); + + /* get submit lock */ + err = mutex_lock_interruptible(&ch->submitlock); + if (err) + return err; + + host1x_channel_set_streamid(ch); + host1x_enable_gather_filter(ch); + host1x_hw_syncpt_assign_to_channel(host, sp, ch); + + /* begin a CDMA submit */ + err = host1x_cdma_begin(&ch->cdma, job); + if (err) { + mutex_unlock(&ch->submitlock); + return err; + } + + channel_program_cdma(job); + syncval = host1x_syncpt_read_max(sp); + + /* + * Create fence before submitting job to HW to avoid job completing + * before the fence is set up. + */ + job->fence = host1x_fence_create(sp, syncval, true); + if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) { + job->fence = NULL; + } else { + err = dma_fence_add_callback(job->fence, &job->fence_cb, + job_complete_callback); + } + + /* end CDMA submit & stash pinned hMems into sync queue */ + host1x_cdma_end(&ch->cdma, job); + + trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval); + + mutex_unlock(&ch->submitlock); + + if (err == -ENOENT) + host1x_cdma_update(&ch->cdma); + else + WARN(err, "Failed to set submit complete interrupt"); + + return 0; +} + +static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, + unsigned int index) +{ +#if HOST1X_HW < 6 + ch->regs = dev->regs + index * 0x4000; +#else + ch->regs = dev->regs + index * 0x100; +#endif + return 0; +} + +static const struct host1x_channel_ops host1x_channel_ops = { + .init = host1x_channel_init, + .submit = channel_submit, +}; diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c new file mode 100644 index 0000000000..54e31d8151 --- /dev/null +++ b/drivers/gpu/host1x/hw/debug_hw.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2010 Google, Inc. + * Author: Erik Gilling <konkers@android.com> + * + * Copyright (C) 2011-2013 NVIDIA Corporation + */ + +#include "../dev.h" +#include "../debug.h" +#include "../cdma.h" +#include "../channel.h" + +#define HOST1X_DEBUG_MAX_PAGE_OFFSET 102400 + +enum { + HOST1X_OPCODE_SETCLASS = 0x00, + HOST1X_OPCODE_INCR = 0x01, + HOST1X_OPCODE_NONINCR = 0x02, + HOST1X_OPCODE_MASK = 0x03, + HOST1X_OPCODE_IMM = 0x04, + HOST1X_OPCODE_RESTART = 0x05, + HOST1X_OPCODE_GATHER = 0x06, + HOST1X_OPCODE_SETSTRMID = 0x07, + HOST1X_OPCODE_SETAPPID = 0x08, + HOST1X_OPCODE_SETPYLD = 0x09, + HOST1X_OPCODE_INCR_W = 0x0a, + HOST1X_OPCODE_NONINCR_W = 0x0b, + HOST1X_OPCODE_GATHER_W = 0x0c, + HOST1X_OPCODE_RESTART_W = 0x0d, + HOST1X_OPCODE_EXTEND = 0x0e, +}; + +enum { + HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK = 0x00, + HOST1X_OPCODE_EXTEND_RELEASE_MLOCK = 0x01, +}; + +#define INVALID_PAYLOAD 0xffffffff + +static unsigned int show_channel_command(struct output *o, u32 val, + u32 *payload) +{ + unsigned int mask, subop, num, opcode; + + opcode = val >> 28; + + switch (opcode) { + case HOST1X_OPCODE_SETCLASS: + mask = val & 0x3f; + if (mask) { + host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [", + val >> 6 & 0x3ff, + val >> 16 & 0xfff, mask); + return hweight8(mask); + } + + host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff); + return 0; + + case HOST1X_OPCODE_INCR: + num = val & 0xffff; + host1x_debug_cont(o, "INCR(offset=%03x, [", + val >> 16 & 0xfff); + if (!num) + host1x_debug_cont(o, "])\n"); + + return num; + + case HOST1X_OPCODE_NONINCR: + num = val & 0xffff; + host1x_debug_cont(o, "NONINCR(offset=%03x, [", + val >> 16 & 0xfff); + if (!num) + host1x_debug_cont(o, "])\n"); + + return num; + + case HOST1X_OPCODE_MASK: + mask = val & 0xffff; + host1x_debug_cont(o, "MASK(offset=%03x, mask=%03x, [", + val >> 16 & 0xfff, mask); + if (!mask) + host1x_debug_cont(o, "])\n"); + + return hweight16(mask); + + case HOST1X_OPCODE_IMM: + host1x_debug_cont(o, "IMM(offset=%03x, data=%03x)\n", + val >> 16 & 0xfff, val & 0xffff); + return 0; + + case HOST1X_OPCODE_RESTART: + host1x_debug_cont(o, "RESTART(offset=%08x)\n", val << 4); + return 0; + + case HOST1X_OPCODE_GATHER: + host1x_debug_cont(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[", + val >> 16 & 0xfff, val >> 15 & 0x1, + val >> 14 & 0x1, val & 0x3fff); + return 1; + +#if HOST1X_HW >= 6 + case HOST1X_OPCODE_SETSTRMID: + host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n", + val & 0x3fffff); + return 0; + + case HOST1X_OPCODE_SETAPPID: + host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff); + return 0; + + case HOST1X_OPCODE_SETPYLD: + *payload = val & 0xffff; + host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload); + return 0; + + case HOST1X_OPCODE_INCR_W: + case HOST1X_OPCODE_NONINCR_W: + host1x_debug_cont(o, "%s(offset=%06x, ", + opcode == HOST1X_OPCODE_INCR_W ? + "INCR_W" : "NONINCR_W", + val & 0x3fffff); + if (*payload == 0) { + host1x_debug_cont(o, "[])\n"); + return 0; + } else if (*payload == INVALID_PAYLOAD) { + host1x_debug_cont(o, "unknown)\n"); + return 0; + } else { + host1x_debug_cont(o, "["); + return *payload; + } + + case HOST1X_OPCODE_GATHER_W: + host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[", + val & 0x3fff); + return 2; +#endif + + case HOST1X_OPCODE_EXTEND: + subop = val >> 24 & 0xf; + if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK) + host1x_debug_cont(o, "ACQUIRE_MLOCK(index=%d)\n", + val & 0xff); + else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK) + host1x_debug_cont(o, "RELEASE_MLOCK(index=%d)\n", + val & 0xff); + else + host1x_debug_cont(o, "EXTEND_UNKNOWN(%08x)\n", val); + return 0; + + default: + host1x_debug_cont(o, "UNKNOWN\n"); + return 0; + } +} + +static void show_gather(struct output *o, dma_addr_t phys_addr, + unsigned int words, struct host1x_cdma *cdma, + dma_addr_t pin_addr, u32 *map_addr) +{ + /* Map dmaget cursor to corresponding mem handle */ + u32 offset = phys_addr - pin_addr; + unsigned int data_count = 0, i; + u32 payload = INVALID_PAYLOAD; + + /* + * Sometimes we're given different hardware address to the same + * page - in these cases the offset will get an invalid number and + * we just have to bail out. + */ + if (offset > HOST1X_DEBUG_MAX_PAGE_OFFSET) { + host1x_debug_output(o, "[address mismatch]\n"); + return; + } + + for (i = 0; i < words; i++) { + dma_addr_t addr = phys_addr + i * 4; + u32 val = *(map_addr + offset / 4 + i); + + if (!data_count) { + host1x_debug_output(o, " %pad: %08x: ", &addr, val); + data_count = show_channel_command(o, val, &payload); + } else { + host1x_debug_cont(o, "%08x%s", val, + data_count > 1 ? ", " : "])\n"); + data_count--; + } + } +} + +static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) +{ + struct push_buffer *pb = &cdma->push_buffer; + struct host1x_job *job; + + list_for_each_entry(job, &cdma->sync_queue, list) { + unsigned int i; + + host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n", + job->syncpt->id, job->syncpt_end, job->timeout, + job->num_slots, job->num_unpins); + + show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma, + pb->dma + job->first_get, pb->mapped + job->first_get); + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + u32 *mapped; + + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + + if (job->gather_copy_mapped) + mapped = (u32 *)job->gather_copy_mapped; + else + mapped = host1x_bo_mmap(g->bo); + + if (!mapped) { + host1x_debug_output(o, "[could not mmap]\n"); + continue; + } + + host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", + &g->base, g->offset, g->words); + + show_gather(o, g->base + g->offset, g->words, cdma, + g->base, mapped); + + if (!job->gather_copy_mapped) + host1x_bo_munmap(g->bo, mapped); + } + } +} + +#if HOST1X_HW >= 6 +#include "debug_hw_1x06.c" +#else +#include "debug_hw_1x01.c" +#endif + +static const struct host1x_debug_ops host1x_debug_ops = { + .show_channel_cdma = host1x_debug_show_channel_cdma, + .show_channel_fifo = host1x_debug_show_channel_fifo, + .show_mlocks = host1x_debug_show_mlocks, +}; diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c new file mode 100644 index 0000000000..85242a59fa --- /dev/null +++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2010 Google, Inc. + * Author: Erik Gilling <konkers@android.com> + * + * Copyright (C) 2011-2013 NVIDIA Corporation + */ + +#include "../dev.h" +#include "../debug.h" +#include "../cdma.h" +#include "../channel.h" + +static void host1x_debug_show_channel_cdma(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ + struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart, dmaend; + u32 dmaput, dmaget, dmactrl; + u32 cbstat, cbread; + u32 val, base, baseval; + + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); + dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); + dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); + dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); + cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id)); + cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id)); + + host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev)); + + if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) || + !ch->cdma.push_buffer.mapped) { + host1x_debug_output(o, "inactive\n\n"); + return; + } + + if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X && + HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) == + HOST1X_UCLASS_WAIT_SYNCPT) + host1x_debug_output(o, "waiting on syncpt %d val %d\n", + cbread >> 24, cbread & 0xffffff); + else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == + HOST1X_CLASS_HOST1X && + HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) == + HOST1X_UCLASS_WAIT_SYNCPT_BASE) { + base = (cbread >> 16) & 0xff; + baseval = + host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base)); + val = cbread & 0xffff; + host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n", + cbread >> 24, baseval + val, base, + baseval, val); + } else + host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n", + HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat), + HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat), + cbread); + + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", + dmaput, dmaget, dmactrl); + host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat); + + show_channel_gathers(o, cdma); + host1x_debug_output(o, "\n"); +} + +static void host1x_debug_show_channel_fifo(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ + u32 val, rd_ptr, wr_ptr, start, end; + unsigned int data_count = 0; + + host1x_debug_output(o, "%u: fifo:\n", ch->id); + + val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT); + host1x_debug_output(o, "FIFOSTAT %08x\n", val); + if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) { + host1x_debug_output(o, "[empty]\n"); + return; + } + + host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); + host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) | + HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id), + HOST1X_SYNC_CFPEEK_CTRL); + + val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS); + rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val); + wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val); + + val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id)); + start = HOST1X_SYNC_CF_SETUP_BASE_V(val); + end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val); + + do { + host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); + host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) | + HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) | + HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr), + HOST1X_SYNC_CFPEEK_CTRL); + val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ); + + if (!data_count) { + host1x_debug_output(o, "%08x: ", val); + data_count = show_channel_command(o, val, NULL); + } else { + host1x_debug_cont(o, "%08x%s", val, + data_count > 1 ? ", " : "])\n"); + data_count--; + } + + if (rd_ptr == end) + rd_ptr = start; + else + rd_ptr++; + } while (rd_ptr != wr_ptr); + + if (data_count) + host1x_debug_cont(o, ", ...])\n"); + host1x_debug_output(o, "\n"); + + host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); +} + +static void host1x_debug_show_mlocks(struct host1x *host, struct output *o) +{ + unsigned int i; + + host1x_debug_output(o, "---- mlocks ----\n"); + + for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) { + u32 owner = + host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i)); + if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner)) + host1x_debug_output(o, "%u: locked by channel %u\n", + i, HOST1X_SYNC_MLOCK_OWNER_CHID_V(owner)); + else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner)) + host1x_debug_output(o, "%u: locked by cpu\n", i); + else + host1x_debug_output(o, "%u: unlocked\n", i); + } + + host1x_debug_output(o, "\n"); +} diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c new file mode 100644 index 0000000000..9d0667879a --- /dev/null +++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2010 Google, Inc. + * Author: Erik Gilling <konkers@android.com> + * + * Copyright (C) 2011-2017 NVIDIA Corporation + */ + +#include "../dev.h" +#include "../debug.h" +#include "../cdma.h" +#include "../channel.h" + +static void host1x_debug_show_channel_cdma(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ + struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart = 0, dmaend = 0; + u32 dmaput, dmaget, dmactrl; + u32 offset, class; + u32 ch_stat; + +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI); + dmastart <<= 32; +#endif + dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI); + dmaend <<= 32; +#endif + dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); + + dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); + dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); + dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); + offset = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_OFFSET); + class = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_CLASS); + ch_stat = host1x_ch_readl(ch, HOST1X_CHANNEL_CHANNELSTAT); + + host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev)); + + if (dmactrl & HOST1X_CHANNEL_DMACTRL_DMASTOP || + !ch->cdma.push_buffer.mapped) { + host1x_debug_output(o, "inactive\n\n"); + return; + } + + if (class == HOST1X_CLASS_HOST1X && offset == HOST1X_UCLASS_WAIT_SYNCPT) + host1x_debug_output(o, "waiting on syncpt\n"); + else + host1x_debug_output(o, "active class %02x, offset %04x\n", + class, offset); + + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", + dmaput, dmaget, dmactrl); + host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat); + + show_channel_gathers(o, cdma); + host1x_debug_output(o, "\n"); +} + +static void host1x_debug_show_channel_fifo(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ +#if HOST1X_HW <= 6 + u32 rd_ptr, wr_ptr, start, end; + u32 payload = INVALID_PAYLOAD; + unsigned int data_count = 0; +#endif + u32 val; + + host1x_debug_output(o, "%u: fifo:\n", ch->id); + + val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_STAT); + host1x_debug_output(o, "CMDFIFO_STAT %08x\n", val); + if (val & HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY) { + host1x_debug_output(o, "[empty]\n"); + return; + } + + val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_RDATA); + host1x_debug_output(o, "CMDFIFO_RDATA %08x\n", val); + +#if HOST1X_HW <= 6 + /* Peek pointer values are invalid during SLCG, so disable it */ + host1x_hypervisor_writel(host, 0x1, HOST1X_HV_ICG_EN_OVERRIDE); + + val = 0; + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE; + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id); + host1x_hypervisor_writel(host, val, HOST1X_HV_CMDFIFO_PEEK_CTRL); + + val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_PEEK_PTRS); + rd_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(val); + wr_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(val); + + val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_SETUP(ch->id)); + start = HOST1X_HV_CMDFIFO_SETUP_BASE_V(val); + end = HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(val); + + do { + val = 0; + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE; + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id); + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(rd_ptr); + host1x_hypervisor_writel(host, val, + HOST1X_HV_CMDFIFO_PEEK_CTRL); + + val = host1x_hypervisor_readl(host, + HOST1X_HV_CMDFIFO_PEEK_READ); + + if (!data_count) { + host1x_debug_output(o, "%03x 0x%08x: ", + rd_ptr - start, val); + data_count = show_channel_command(o, val, &payload); + } else { + host1x_debug_cont(o, "%08x%s", val, + data_count > 1 ? ", " : "])\n"); + data_count--; + } + + if (rd_ptr == end) + rd_ptr = start; + else + rd_ptr++; + } while (rd_ptr != wr_ptr); + + if (data_count) + host1x_debug_cont(o, ", ...])\n"); + host1x_debug_output(o, "\n"); + + host1x_hypervisor_writel(host, 0x0, HOST1X_HV_CMDFIFO_PEEK_CTRL); + host1x_hypervisor_writel(host, 0x0, HOST1X_HV_ICG_EN_OVERRIDE); +#endif +} + +static void host1x_debug_show_mlocks(struct host1x *host, struct output *o) +{ + /* TODO */ +} diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c new file mode 100644 index 0000000000..8d8a117a51 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x01.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for T20 and T30 Architecture Chips + * + * Copyright (c) 2011-2013, NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x01.h" +#include "host1x01_hardware.h" + +/* include code */ +#define HOST1X_HW 1 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x01_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x01.h b/drivers/gpu/host1x/hw/host1x01.h new file mode 100644 index 0000000000..6516c3f1ed --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x01.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for T20 and T30 Architecture Chips + * + * Copyright (c) 2011-2013, NVIDIA Corporation. + */ +#ifndef HOST1X_HOST1X01_H +#define HOST1X_HOST1X01_H + +struct host1x; + +int host1x01_init(struct host1x *host); + +#endif /* HOST1X_HOST1X01_H_ */ diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h new file mode 100644 index 0000000000..cb93d7c180 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x01_hardware.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra20 and Tegra30 + * + * Copyright (c) 2010-2013 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X01_HARDWARE_H +#define __HOST1X_HOST1X01_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x01_channel.h" +#include "hw_host1x01_sync.h" +#include "hw_host1x01_uclass.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/host1x02.c b/drivers/gpu/host1x/hw/host1x02.c new file mode 100644 index 0000000000..583b33c048 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x02.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra114 SoCs + * + * Copyright (c) 2013 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x02.h" +#include "host1x02_hardware.h" + +/* include code */ +#define HOST1X_HW 2 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x02_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x02.h b/drivers/gpu/host1x/hw/host1x02.h new file mode 100644 index 0000000000..7e5c3e4700 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x02.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra114 SoCs + * + * Copyright (c) 2013 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X02_H +#define HOST1X_HOST1X02_H + +struct host1x; + +int host1x02_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h new file mode 100644 index 0000000000..2d1282b9bc --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x02_hardware.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra114 + * + * Copyright (c) 2010-2013 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X02_HARDWARE_H +#define __HOST1X_HOST1X02_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x02_channel.h" +#include "hw_host1x02_sync.h" +#include "hw_host1x02_uclass.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/host1x04.c b/drivers/gpu/host1x/hw/host1x04.c new file mode 100644 index 0000000000..26b459eb2d --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x04.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra124 SoCs + * + * Copyright (c) 2013 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x04.h" +#include "host1x04_hardware.h" + +/* include code */ +#define HOST1X_HW 4 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x04_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x04.h b/drivers/gpu/host1x/hw/host1x04.h new file mode 100644 index 0000000000..2a1e8153c5 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x04.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra124 SoCs + * + * Copyright (c) 2013 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X04_H +#define HOST1X_HOST1X04_H + +struct host1x; + +int host1x04_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h new file mode 100644 index 0000000000..84d244e8af --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x04_hardware.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra124 + * + * Copyright (c) 2010-2013 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X04_HARDWARE_H +#define __HOST1X_HOST1X04_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x04_channel.h" +#include "hw_host1x04_sync.h" +#include "hw_host1x04_uclass.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/host1x05.c b/drivers/gpu/host1x/hw/host1x05.c new file mode 100644 index 0000000000..6d9803343a --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x05.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra210 SoCs + * + * Copyright (c) 2015 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x05.h" +#include "host1x05_hardware.h" + +/* include code */ +#define HOST1X_HW 5 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x05_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x05.h b/drivers/gpu/host1x/hw/host1x05.h new file mode 100644 index 0000000000..addfd41e7e --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x05.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra210 SoCs + * + * Copyright (c) 2015 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X05_H +#define HOST1X_HOST1X05_H + +struct host1x; + +int host1x05_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h new file mode 100644 index 0000000000..1dcde6ec79 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x05_hardware.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra210 + * + * Copyright (c) 2015 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X05_HARDWARE_H +#define __HOST1X_HOST1X05_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x05_channel.h" +#include "hw_host1x05_sync.h" +#include "hw_host1x05_uclass.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/host1x06.c b/drivers/gpu/host1x/hw/host1x06.c new file mode 100644 index 0000000000..844f81ae2d --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x06.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra186 SoCs + * + * Copyright (c) 2017 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x06.h" +#include "host1x06_hardware.h" + +/* include code */ +#define HOST1X_HW 6 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x06_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x06.h b/drivers/gpu/host1x/hw/host1x06.h new file mode 100644 index 0000000000..4ea756895c --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x06.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra186 SoCs + * + * Copyright (c) 2017 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X06_H +#define HOST1X_HOST1X06_H + +struct host1x; + +int host1x06_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h new file mode 100644 index 0000000000..c05cfa7e30 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x06_hardware.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra186 + * + * Copyright (c) 2017 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X06_HARDWARE_H +#define __HOST1X_HOST1X06_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x06_channel.h" +#include "hw_host1x06_uclass.h" +#include "hw_host1x06_vm.h" +#include "hw_host1x06_hypervisor.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/host1x07.c b/drivers/gpu/host1x/hw/host1x07.c new file mode 100644 index 0000000000..0c6f14f7ec --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x07.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra194 SoCs + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x07.h" +#include "host1x07_hardware.h" + +/* include code */ +#define HOST1X_HW 7 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x07_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x07.h b/drivers/gpu/host1x/hw/host1x07.h new file mode 100644 index 0000000000..419b6eaad3 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x07.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra194 SoCs + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X07_H +#define HOST1X_HOST1X07_H + +struct host1x; + +int host1x07_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h new file mode 100644 index 0000000000..d67364e039 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x07_hardware.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra194 + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X07_HARDWARE_H +#define __HOST1X_HOST1X07_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x07_channel.h" +#include "hw_host1x07_uclass.h" +#include "hw_host1x07_vm.h" +#include "hw_host1x07_hypervisor.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c new file mode 100644 index 0000000000..754890c34c --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra234 SoCs + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x08.h" +#include "host1x08_hardware.h" + +/* include code */ +#define HOST1X_HW 8 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x08_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h new file mode 100644 index 0000000000..a6bad56e44 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra234 SoCs + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X08_H +#define HOST1X_HOST1X08_H + +struct host1x; + +int host1x08_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x08_hardware.h b/drivers/gpu/host1x/hw/host1x08_hardware.h new file mode 100644 index 0000000000..936243060b --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08_hardware.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra234 + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X08_HARDWARE_H +#define __HOST1X_HOST1X08_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x08_uclass.h" +#include "hw_host1x08_vm.h" +#include "hw_host1x08_hypervisor.h" +#include "hw_host1x08_common.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x01_channel.h b/drivers/gpu/host1x/hw/hw_host1x01_channel.h new file mode 100644 index 0000000000..8da43eaba2 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x01_channel.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2013, NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef __hw_host1x_channel_host1x_h__ +#define __hw_host1x_channel_host1x_h__ + +static inline u32 host1x_channel_fifostat_r(void) +{ + return 0x0; +} +#define HOST1X_CHANNEL_FIFOSTAT \ + host1x_channel_fifostat_r() +static inline u32 host1x_channel_fifostat_cfempty_v(u32 r) +{ + return (r >> 10) & 0x1; +} +#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \ + host1x_channel_fifostat_cfempty_v(r) +static inline u32 host1x_channel_dmastart_r(void) +{ + return 0x14; +} +#define HOST1X_CHANNEL_DMASTART \ + host1x_channel_dmastart_r() +static inline u32 host1x_channel_dmaput_r(void) +{ + return 0x18; +} +#define HOST1X_CHANNEL_DMAPUT \ + host1x_channel_dmaput_r() +static inline u32 host1x_channel_dmaget_r(void) +{ + return 0x1c; +} +#define HOST1X_CHANNEL_DMAGET \ + host1x_channel_dmaget_r() +static inline u32 host1x_channel_dmaend_r(void) +{ + return 0x20; +} +#define HOST1X_CHANNEL_DMAEND \ + host1x_channel_dmaend_r() +static inline u32 host1x_channel_dmactrl_r(void) +{ + return 0x24; +} +#define HOST1X_CHANNEL_DMACTRL \ + host1x_channel_dmactrl_r() +static inline u32 host1x_channel_dmactrl_dmastop(void) +{ + return 1 << 0; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP \ + host1x_channel_dmactrl_dmastop() +static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \ + host1x_channel_dmactrl_dmastop_v(r) +static inline u32 host1x_channel_dmactrl_dmagetrst(void) +{ + return 1 << 1; +} +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \ + host1x_channel_dmactrl_dmagetrst() +static inline u32 host1x_channel_dmactrl_dmainitget(void) +{ + return 1 << 2; +} +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \ + host1x_channel_dmactrl_dmainitget() +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x01_sync.h b/drivers/gpu/host1x/hw/hw_host1x01_sync.h new file mode 100644 index 0000000000..ec95e7ae7c --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x01_sync.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2013, NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef __hw_host1x01_sync_h__ +#define __hw_host1x01_sync_h__ + +#define REGISTER_STRIDE 4 + +static inline u32 host1x_sync_syncpt_r(unsigned int id) +{ + return 0x400 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT(id) \ + host1x_sync_syncpt_r(id) +static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id) +{ + return 0x40 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \ + host1x_sync_syncpt_thresh_cpu0_int_status_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id) +{ + return 0x60 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \ + host1x_sync_syncpt_thresh_int_disable_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id) +{ + return 0x68 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \ + host1x_sync_syncpt_thresh_int_enable_cpu0_r(id) +static inline u32 host1x_sync_cf_setup_r(unsigned int channel) +{ + return 0x80 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CF_SETUP(channel) \ + host1x_sync_cf_setup_r(channel) +static inline u32 host1x_sync_cf_setup_base_v(u32 r) +{ + return (r >> 0) & 0x1ff; +} +#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \ + host1x_sync_cf_setup_base_v(r) +static inline u32 host1x_sync_cf_setup_limit_v(u32 r) +{ + return (r >> 16) & 0x1ff; +} +#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \ + host1x_sync_cf_setup_limit_v(r) +static inline u32 host1x_sync_cmdproc_stop_r(void) +{ + return 0xac; +} +#define HOST1X_SYNC_CMDPROC_STOP \ + host1x_sync_cmdproc_stop_r() +static inline u32 host1x_sync_ch_teardown_r(void) +{ + return 0xb0; +} +#define HOST1X_SYNC_CH_TEARDOWN \ + host1x_sync_ch_teardown_r() +static inline u32 host1x_sync_usec_clk_r(void) +{ + return 0x1a4; +} +#define HOST1X_SYNC_USEC_CLK \ + host1x_sync_usec_clk_r() +static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void) +{ + return 0x1a8; +} +#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \ + host1x_sync_ctxsw_timeout_cfg_r() +static inline u32 host1x_sync_ip_busy_timeout_r(void) +{ + return 0x1bc; +} +#define HOST1X_SYNC_IP_BUSY_TIMEOUT \ + host1x_sync_ip_busy_timeout_r() +static inline u32 host1x_sync_mlock_owner_r(unsigned int id) +{ + return 0x340 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_MLOCK_OWNER(id) \ + host1x_sync_mlock_owner_r(id) +static inline u32 host1x_sync_mlock_owner_chid_v(u32 v) +{ + return (v >> 8) & 0xf; +} +#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \ + host1x_sync_mlock_owner_chid_v(v) +static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r) +{ + return (r >> 1) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \ + host1x_sync_mlock_owner_cpu_owns_v(r) +static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \ + host1x_sync_mlock_owner_ch_owns_v(r) +static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id) +{ + return 0x500 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \ + host1x_sync_syncpt_int_thresh_r(id) +static inline u32 host1x_sync_syncpt_base_r(unsigned int id) +{ + return 0x600 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_BASE(id) \ + host1x_sync_syncpt_base_r(id) +static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id) +{ + return 0x700 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \ + host1x_sync_syncpt_cpu_incr_r(id) +static inline u32 host1x_sync_cbread_r(unsigned int channel) +{ + return 0x720 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBREAD(channel) \ + host1x_sync_cbread_r(channel) +static inline u32 host1x_sync_cfpeek_ctrl_r(void) +{ + return 0x74c; +} +#define HOST1X_SYNC_CFPEEK_CTRL \ + host1x_sync_cfpeek_ctrl_r() +static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v) +{ + return (v & 0x1ff) << 0; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \ + host1x_sync_cfpeek_ctrl_addr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v) +{ + return (v & 0x7) << 16; +} +#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \ + host1x_sync_cfpeek_ctrl_channr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v) +{ + return (v & 0x1) << 31; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \ + host1x_sync_cfpeek_ctrl_ena_f(v) +static inline u32 host1x_sync_cfpeek_read_r(void) +{ + return 0x750; +} +#define HOST1X_SYNC_CFPEEK_READ \ + host1x_sync_cfpeek_read_r() +static inline u32 host1x_sync_cfpeek_ptrs_r(void) +{ + return 0x754; +} +#define HOST1X_SYNC_CFPEEK_PTRS \ + host1x_sync_cfpeek_ptrs_r() +static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r) +{ + return (r >> 0) & 0x1ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r) +static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r) +{ + return (r >> 16) & 0x1ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r) +static inline u32 host1x_sync_cbstat_r(unsigned int channel) +{ + return 0x758 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBSTAT(channel) \ + host1x_sync_cbstat_r(channel) +static inline u32 host1x_sync_cbstat_cboffset_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \ + host1x_sync_cbstat_cboffset_v(r) +static inline u32 host1x_sync_cbstat_cbclass_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \ + host1x_sync_cbstat_cbclass_v(r) + +#endif /* __hw_host1x01_sync_h__ */ diff --git a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h new file mode 100644 index 0000000000..1239bfd46a --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2012-2013, NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef __hw_host1x_uclass_host1x_h__ +#define __hw_host1x_uclass_host1x_h__ + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 8; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x02_channel.h b/drivers/gpu/host1x/hw/hw_host1x02_channel.h new file mode 100644 index 0000000000..210d317ad2 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x02_channel.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X02_CHANNEL_H +#define HOST1X_HW_HOST1X02_CHANNEL_H + +static inline u32 host1x_channel_fifostat_r(void) +{ + return 0x0; +} +#define HOST1X_CHANNEL_FIFOSTAT \ + host1x_channel_fifostat_r() +static inline u32 host1x_channel_fifostat_cfempty_v(u32 r) +{ + return (r >> 11) & 0x1; +} +#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \ + host1x_channel_fifostat_cfempty_v(r) +static inline u32 host1x_channel_dmastart_r(void) +{ + return 0x14; +} +#define HOST1X_CHANNEL_DMASTART \ + host1x_channel_dmastart_r() +static inline u32 host1x_channel_dmaput_r(void) +{ + return 0x18; +} +#define HOST1X_CHANNEL_DMAPUT \ + host1x_channel_dmaput_r() +static inline u32 host1x_channel_dmaget_r(void) +{ + return 0x1c; +} +#define HOST1X_CHANNEL_DMAGET \ + host1x_channel_dmaget_r() +static inline u32 host1x_channel_dmaend_r(void) +{ + return 0x20; +} +#define HOST1X_CHANNEL_DMAEND \ + host1x_channel_dmaend_r() +static inline u32 host1x_channel_dmactrl_r(void) +{ + return 0x24; +} +#define HOST1X_CHANNEL_DMACTRL \ + host1x_channel_dmactrl_r() +static inline u32 host1x_channel_dmactrl_dmastop(void) +{ + return 1 << 0; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP \ + host1x_channel_dmactrl_dmastop() +static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \ + host1x_channel_dmactrl_dmastop_v(r) +static inline u32 host1x_channel_dmactrl_dmagetrst(void) +{ + return 1 << 1; +} +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \ + host1x_channel_dmactrl_dmagetrst() +static inline u32 host1x_channel_dmactrl_dmainitget(void) +{ + return 1 << 2; +} +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \ + host1x_channel_dmactrl_dmainitget() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x02_sync.h b/drivers/gpu/host1x/hw/hw_host1x02_sync.h new file mode 100644 index 0000000000..44b4f83797 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x02_sync.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X02_SYNC_H +#define HOST1X_HW_HOST1X02_SYNC_H + +#define REGISTER_STRIDE 4 + +static inline u32 host1x_sync_syncpt_r(unsigned int id) +{ + return 0x400 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT(id) \ + host1x_sync_syncpt_r(id) +static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id) +{ + return 0x40 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \ + host1x_sync_syncpt_thresh_cpu0_int_status_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id) +{ + return 0x60 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \ + host1x_sync_syncpt_thresh_int_disable_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id) +{ + return 0x68 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \ + host1x_sync_syncpt_thresh_int_enable_cpu0_r(id) +static inline u32 host1x_sync_cf_setup_r(unsigned int channel) +{ + return 0x80 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CF_SETUP(channel) \ + host1x_sync_cf_setup_r(channel) +static inline u32 host1x_sync_cf_setup_base_v(u32 r) +{ + return (r >> 0) & 0x3ff; +} +#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \ + host1x_sync_cf_setup_base_v(r) +static inline u32 host1x_sync_cf_setup_limit_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \ + host1x_sync_cf_setup_limit_v(r) +static inline u32 host1x_sync_cmdproc_stop_r(void) +{ + return 0xac; +} +#define HOST1X_SYNC_CMDPROC_STOP \ + host1x_sync_cmdproc_stop_r() +static inline u32 host1x_sync_ch_teardown_r(void) +{ + return 0xb0; +} +#define HOST1X_SYNC_CH_TEARDOWN \ + host1x_sync_ch_teardown_r() +static inline u32 host1x_sync_usec_clk_r(void) +{ + return 0x1a4; +} +#define HOST1X_SYNC_USEC_CLK \ + host1x_sync_usec_clk_r() +static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void) +{ + return 0x1a8; +} +#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \ + host1x_sync_ctxsw_timeout_cfg_r() +static inline u32 host1x_sync_ip_busy_timeout_r(void) +{ + return 0x1bc; +} +#define HOST1X_SYNC_IP_BUSY_TIMEOUT \ + host1x_sync_ip_busy_timeout_r() +static inline u32 host1x_sync_mlock_owner_r(unsigned int id) +{ + return 0x340 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_MLOCK_OWNER(id) \ + host1x_sync_mlock_owner_r(id) +static inline u32 host1x_sync_mlock_owner_chid_v(u32 v) +{ + return (v >> 8) & 0xf; +} +#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \ + host1x_sync_mlock_owner_chid_v(v) +static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r) +{ + return (r >> 1) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \ + host1x_sync_mlock_owner_cpu_owns_v(r) +static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \ + host1x_sync_mlock_owner_ch_owns_v(r) +static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id) +{ + return 0x500 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \ + host1x_sync_syncpt_int_thresh_r(id) +static inline u32 host1x_sync_syncpt_base_r(unsigned int id) +{ + return 0x600 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_BASE(id) \ + host1x_sync_syncpt_base_r(id) +static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id) +{ + return 0x700 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \ + host1x_sync_syncpt_cpu_incr_r(id) +static inline u32 host1x_sync_cbread_r(unsigned int channel) +{ + return 0x720 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBREAD(channel) \ + host1x_sync_cbread_r(channel) +static inline u32 host1x_sync_cfpeek_ctrl_r(void) +{ + return 0x74c; +} +#define HOST1X_SYNC_CFPEEK_CTRL \ + host1x_sync_cfpeek_ctrl_r() +static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \ + host1x_sync_cfpeek_ctrl_addr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v) +{ + return (v & 0xf) << 16; +} +#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \ + host1x_sync_cfpeek_ctrl_channr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v) +{ + return (v & 0x1) << 31; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \ + host1x_sync_cfpeek_ctrl_ena_f(v) +static inline u32 host1x_sync_cfpeek_read_r(void) +{ + return 0x750; +} +#define HOST1X_SYNC_CFPEEK_READ \ + host1x_sync_cfpeek_read_r() +static inline u32 host1x_sync_cfpeek_ptrs_r(void) +{ + return 0x754; +} +#define HOST1X_SYNC_CFPEEK_PTRS \ + host1x_sync_cfpeek_ptrs_r() +static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r) +{ + return (r >> 0) & 0x3ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r) +static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r) +static inline u32 host1x_sync_cbstat_r(unsigned int channel) +{ + return 0x758 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBSTAT(channel) \ + host1x_sync_cbstat_r(channel) +static inline u32 host1x_sync_cbstat_cboffset_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \ + host1x_sync_cbstat_cboffset_v(r) +static inline u32 host1x_sync_cbstat_cbclass_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \ + host1x_sync_cbstat_cbclass_v(r) + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h new file mode 100644 index 0000000000..0a2ab8f1da --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X02_UCLASS_H +#define HOST1X_HW_HOST1X02_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 8; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h b/drivers/gpu/host1x/hw/hw_host1x04_channel.h new file mode 100644 index 0000000000..38d110645e --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X04_CHANNEL_H +#define HOST1X_HW_HOST1X04_CHANNEL_H + +static inline u32 host1x_channel_fifostat_r(void) +{ + return 0x0; +} +#define HOST1X_CHANNEL_FIFOSTAT \ + host1x_channel_fifostat_r() +static inline u32 host1x_channel_fifostat_cfempty_v(u32 r) +{ + return (r >> 11) & 0x1; +} +#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \ + host1x_channel_fifostat_cfempty_v(r) +static inline u32 host1x_channel_dmastart_r(void) +{ + return 0x14; +} +#define HOST1X_CHANNEL_DMASTART \ + host1x_channel_dmastart_r() +static inline u32 host1x_channel_dmaput_r(void) +{ + return 0x18; +} +#define HOST1X_CHANNEL_DMAPUT \ + host1x_channel_dmaput_r() +static inline u32 host1x_channel_dmaget_r(void) +{ + return 0x1c; +} +#define HOST1X_CHANNEL_DMAGET \ + host1x_channel_dmaget_r() +static inline u32 host1x_channel_dmaend_r(void) +{ + return 0x20; +} +#define HOST1X_CHANNEL_DMAEND \ + host1x_channel_dmaend_r() +static inline u32 host1x_channel_dmactrl_r(void) +{ + return 0x24; +} +#define HOST1X_CHANNEL_DMACTRL \ + host1x_channel_dmactrl_r() +static inline u32 host1x_channel_dmactrl_dmastop(void) +{ + return 1 << 0; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP \ + host1x_channel_dmactrl_dmastop() +static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \ + host1x_channel_dmactrl_dmastop_v(r) +static inline u32 host1x_channel_dmactrl_dmagetrst(void) +{ + return 1 << 1; +} +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \ + host1x_channel_dmactrl_dmagetrst() +static inline u32 host1x_channel_dmactrl_dmainitget(void) +{ + return 1 << 2; +} +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \ + host1x_channel_dmactrl_dmainitget() +static inline u32 host1x_channel_channelctrl_r(void) +{ + return 0x98; +} +#define HOST1X_CHANNEL_CHANNELCTRL \ + host1x_channel_channelctrl_r() +static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v) +{ + return (v & 0x1) << 2; +} +#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \ + host1x_channel_channelctrl_kernel_filter_gbuffer_f(v) + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x04_sync.h b/drivers/gpu/host1x/hw/hw_host1x04_sync.h new file mode 100644 index 0000000000..0be98562c2 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x04_sync.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X04_SYNC_H +#define HOST1X_HW_HOST1X04_SYNC_H + +#define REGISTER_STRIDE 4 + +static inline u32 host1x_sync_syncpt_r(unsigned int id) +{ + return 0xf80 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT(id) \ + host1x_sync_syncpt_r(id) +static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id) +{ + return 0xe80 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \ + host1x_sync_syncpt_thresh_cpu0_int_status_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id) +{ + return 0xf00 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \ + host1x_sync_syncpt_thresh_int_disable_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id) +{ + return 0xf20 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \ + host1x_sync_syncpt_thresh_int_enable_cpu0_r(id) +static inline u32 host1x_sync_cf_setup_r(unsigned int channel) +{ + return 0xc00 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CF_SETUP(channel) \ + host1x_sync_cf_setup_r(channel) +static inline u32 host1x_sync_cf_setup_base_v(u32 r) +{ + return (r >> 0) & 0x3ff; +} +#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \ + host1x_sync_cf_setup_base_v(r) +static inline u32 host1x_sync_cf_setup_limit_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \ + host1x_sync_cf_setup_limit_v(r) +static inline u32 host1x_sync_cmdproc_stop_r(void) +{ + return 0xac; +} +#define HOST1X_SYNC_CMDPROC_STOP \ + host1x_sync_cmdproc_stop_r() +static inline u32 host1x_sync_ch_teardown_r(void) +{ + return 0xb0; +} +#define HOST1X_SYNC_CH_TEARDOWN \ + host1x_sync_ch_teardown_r() +static inline u32 host1x_sync_usec_clk_r(void) +{ + return 0x1a4; +} +#define HOST1X_SYNC_USEC_CLK \ + host1x_sync_usec_clk_r() +static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void) +{ + return 0x1a8; +} +#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \ + host1x_sync_ctxsw_timeout_cfg_r() +static inline u32 host1x_sync_ip_busy_timeout_r(void) +{ + return 0x1bc; +} +#define HOST1X_SYNC_IP_BUSY_TIMEOUT \ + host1x_sync_ip_busy_timeout_r() +static inline u32 host1x_sync_mlock_owner_r(unsigned int id) +{ + return 0x340 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_MLOCK_OWNER(id) \ + host1x_sync_mlock_owner_r(id) +static inline u32 host1x_sync_mlock_owner_chid_v(u32 v) +{ + return (v >> 8) & 0xf; +} +#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \ + host1x_sync_mlock_owner_chid_v(v) +static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r) +{ + return (r >> 1) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \ + host1x_sync_mlock_owner_cpu_owns_v(r) +static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \ + host1x_sync_mlock_owner_ch_owns_v(r) +static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id) +{ + return 0x1380 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \ + host1x_sync_syncpt_int_thresh_r(id) +static inline u32 host1x_sync_syncpt_base_r(unsigned int id) +{ + return 0x600 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_BASE(id) \ + host1x_sync_syncpt_base_r(id) +static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id) +{ + return 0xf60 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \ + host1x_sync_syncpt_cpu_incr_r(id) +static inline u32 host1x_sync_cbread_r(unsigned int channel) +{ + return 0xc80 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBREAD(channel) \ + host1x_sync_cbread_r(channel) +static inline u32 host1x_sync_cfpeek_ctrl_r(void) +{ + return 0x74c; +} +#define HOST1X_SYNC_CFPEEK_CTRL \ + host1x_sync_cfpeek_ctrl_r() +static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \ + host1x_sync_cfpeek_ctrl_addr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v) +{ + return (v & 0xf) << 16; +} +#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \ + host1x_sync_cfpeek_ctrl_channr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v) +{ + return (v & 0x1) << 31; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \ + host1x_sync_cfpeek_ctrl_ena_f(v) +static inline u32 host1x_sync_cfpeek_read_r(void) +{ + return 0x750; +} +#define HOST1X_SYNC_CFPEEK_READ \ + host1x_sync_cfpeek_read_r() +static inline u32 host1x_sync_cfpeek_ptrs_r(void) +{ + return 0x754; +} +#define HOST1X_SYNC_CFPEEK_PTRS \ + host1x_sync_cfpeek_ptrs_r() +static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r) +{ + return (r >> 0) & 0x3ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r) +static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r) +static inline u32 host1x_sync_cbstat_r(unsigned int channel) +{ + return 0xcc0 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBSTAT(channel) \ + host1x_sync_cbstat_r(channel) +static inline u32 host1x_sync_cbstat_cboffset_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \ + host1x_sync_cbstat_cboffset_v(r) +static inline u32 host1x_sync_cbstat_cbclass_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \ + host1x_sync_cbstat_cbclass_v(r) + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h new file mode 100644 index 0000000000..60c692b929 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2013 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X04_UCLASS_H +#define HOST1X_HW_HOST1X04_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 8; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h new file mode 100644 index 0000000000..7e628ef58c --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X05_CHANNEL_H +#define HOST1X_HW_HOST1X05_CHANNEL_H + +static inline u32 host1x_channel_fifostat_r(void) +{ + return 0x0; +} +#define HOST1X_CHANNEL_FIFOSTAT \ + host1x_channel_fifostat_r() +static inline u32 host1x_channel_fifostat_cfempty_v(u32 r) +{ + return (r >> 11) & 0x1; +} +#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \ + host1x_channel_fifostat_cfempty_v(r) +static inline u32 host1x_channel_dmastart_r(void) +{ + return 0x14; +} +#define HOST1X_CHANNEL_DMASTART \ + host1x_channel_dmastart_r() +static inline u32 host1x_channel_dmaput_r(void) +{ + return 0x18; +} +#define HOST1X_CHANNEL_DMAPUT \ + host1x_channel_dmaput_r() +static inline u32 host1x_channel_dmaget_r(void) +{ + return 0x1c; +} +#define HOST1X_CHANNEL_DMAGET \ + host1x_channel_dmaget_r() +static inline u32 host1x_channel_dmaend_r(void) +{ + return 0x20; +} +#define HOST1X_CHANNEL_DMAEND \ + host1x_channel_dmaend_r() +static inline u32 host1x_channel_dmactrl_r(void) +{ + return 0x24; +} +#define HOST1X_CHANNEL_DMACTRL \ + host1x_channel_dmactrl_r() +static inline u32 host1x_channel_dmactrl_dmastop(void) +{ + return 1 << 0; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP \ + host1x_channel_dmactrl_dmastop() +static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \ + host1x_channel_dmactrl_dmastop_v(r) +static inline u32 host1x_channel_dmactrl_dmagetrst(void) +{ + return 1 << 1; +} +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \ + host1x_channel_dmactrl_dmagetrst() +static inline u32 host1x_channel_dmactrl_dmainitget(void) +{ + return 1 << 2; +} +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \ + host1x_channel_dmactrl_dmainitget() +static inline u32 host1x_channel_channelctrl_r(void) +{ + return 0x98; +} +#define HOST1X_CHANNEL_CHANNELCTRL \ + host1x_channel_channelctrl_r() +static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v) +{ + return (v & 0x1) << 2; +} +#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \ + host1x_channel_channelctrl_kernel_filter_gbuffer_f(v) + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x05_sync.h b/drivers/gpu/host1x/hw/hw_host1x05_sync.h new file mode 100644 index 0000000000..1a85c793bd --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x05_sync.h @@ -0,0 +1,231 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X05_SYNC_H +#define HOST1X_HW_HOST1X05_SYNC_H + +#define REGISTER_STRIDE 4 + +static inline u32 host1x_sync_syncpt_r(unsigned int id) +{ + return 0xf80 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT(id) \ + host1x_sync_syncpt_r(id) +static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id) +{ + return 0xe80 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \ + host1x_sync_syncpt_thresh_cpu0_int_status_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id) +{ + return 0xf00 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \ + host1x_sync_syncpt_thresh_int_disable_r(id) +static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id) +{ + return 0xf20 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \ + host1x_sync_syncpt_thresh_int_enable_cpu0_r(id) +static inline u32 host1x_sync_cf_setup_r(unsigned int channel) +{ + return 0xc00 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CF_SETUP(channel) \ + host1x_sync_cf_setup_r(channel) +static inline u32 host1x_sync_cf_setup_base_v(u32 r) +{ + return (r >> 0) & 0x3ff; +} +#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \ + host1x_sync_cf_setup_base_v(r) +static inline u32 host1x_sync_cf_setup_limit_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \ + host1x_sync_cf_setup_limit_v(r) +static inline u32 host1x_sync_cmdproc_stop_r(void) +{ + return 0xac; +} +#define HOST1X_SYNC_CMDPROC_STOP \ + host1x_sync_cmdproc_stop_r() +static inline u32 host1x_sync_ch_teardown_r(void) +{ + return 0xb0; +} +#define HOST1X_SYNC_CH_TEARDOWN \ + host1x_sync_ch_teardown_r() +static inline u32 host1x_sync_usec_clk_r(void) +{ + return 0x1a4; +} +#define HOST1X_SYNC_USEC_CLK \ + host1x_sync_usec_clk_r() +static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void) +{ + return 0x1a8; +} +#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \ + host1x_sync_ctxsw_timeout_cfg_r() +static inline u32 host1x_sync_ip_busy_timeout_r(void) +{ + return 0x1bc; +} +#define HOST1X_SYNC_IP_BUSY_TIMEOUT \ + host1x_sync_ip_busy_timeout_r() +static inline u32 host1x_sync_mlock_owner_r(unsigned int id) +{ + return 0x340 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_MLOCK_OWNER(id) \ + host1x_sync_mlock_owner_r(id) +static inline u32 host1x_sync_mlock_owner_chid_v(u32 r) +{ + return (r >> 8) & 0xf; +} +#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \ + host1x_sync_mlock_owner_chid_v(v) +static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r) +{ + return (r >> 1) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \ + host1x_sync_mlock_owner_cpu_owns_v(r) +static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r) +{ + return (r >> 0) & 0x1; +} +#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \ + host1x_sync_mlock_owner_ch_owns_v(r) +static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id) +{ + return 0x1380 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \ + host1x_sync_syncpt_int_thresh_r(id) +static inline u32 host1x_sync_syncpt_base_r(unsigned int id) +{ + return 0x600 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_BASE(id) \ + host1x_sync_syncpt_base_r(id) +static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id) +{ + return 0xf60 + id * REGISTER_STRIDE; +} +#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \ + host1x_sync_syncpt_cpu_incr_r(id) +static inline u32 host1x_sync_cbread_r(unsigned int channel) +{ + return 0xc80 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBREAD(channel) \ + host1x_sync_cbread_r(channel) +static inline u32 host1x_sync_cfpeek_ctrl_r(void) +{ + return 0x74c; +} +#define HOST1X_SYNC_CFPEEK_CTRL \ + host1x_sync_cfpeek_ctrl_r() +static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \ + host1x_sync_cfpeek_ctrl_addr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v) +{ + return (v & 0xf) << 16; +} +#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \ + host1x_sync_cfpeek_ctrl_channr_f(v) +static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v) +{ + return (v & 0x1) << 31; +} +#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \ + host1x_sync_cfpeek_ctrl_ena_f(v) +static inline u32 host1x_sync_cfpeek_read_r(void) +{ + return 0x750; +} +#define HOST1X_SYNC_CFPEEK_READ \ + host1x_sync_cfpeek_read_r() +static inline u32 host1x_sync_cfpeek_ptrs_r(void) +{ + return 0x754; +} +#define HOST1X_SYNC_CFPEEK_PTRS \ + host1x_sync_cfpeek_ptrs_r() +static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r) +{ + return (r >> 0) & 0x3ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r) +static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \ + host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r) +static inline u32 host1x_sync_cbstat_r(unsigned int channel) +{ + return 0xcc0 + channel * REGISTER_STRIDE; +} +#define HOST1X_SYNC_CBSTAT(channel) \ + host1x_sync_cbstat_r(channel) +static inline u32 host1x_sync_cbstat_cboffset_v(u32 r) +{ + return (r >> 0) & 0xffff; +} +#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \ + host1x_sync_cbstat_cboffset_v(r) +static inline u32 host1x_sync_cbstat_cbclass_v(u32 r) +{ + return (r >> 16) & 0x3ff; +} +#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \ + host1x_sync_cbstat_cbclass_v(r) + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h new file mode 100644 index 0000000000..2fcc9a2ad3 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2015 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X05_UCLASS_H +#define HOST1X_HW_HOST1X05_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 8; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_channel.h b/drivers/gpu/host1x/hw/hw_host1x06_channel.h new file mode 100644 index 0000000000..18ae1c57bb --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x06_channel.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X06_CHANNEL_H +#define HOST1X_HW_HOST1X06_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h new file mode 100644 index 0000000000..a7fc9ec4bc --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2017 NVIDIA Corporation. + */ + +#define HOST1X_HV_SYNCPT_PROT_EN 0x1ac4 +#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1) +#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x2020 + (x * 4)) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL 0x233c +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(x) (x) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(x) ((x) << 16) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE BIT(31) +#define HOST1X_HV_CMDFIFO_PEEK_READ 0x2340 +#define HOST1X_HV_CMDFIFO_PEEK_PTRS 0x2344 +#define HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(x) (((x) >> 16) & 0xfff) +#define HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(x) ((x) & 0xfff) +#define HOST1X_HV_CMDFIFO_SETUP(x) (0x2588 + (x * 4)) +#define HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(x) (((x) >> 16) & 0xfff) +#define HOST1X_HV_CMDFIFO_SETUP_BASE_V(x) ((x) & 0xfff) +#define HOST1X_HV_ICG_EN_OVERRIDE 0x2aa8 diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h new file mode 100644 index 0000000000..50c32de452 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2017 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X06_UCLASS_H +#define HOST1X_HW_HOST1X06_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_vm.h b/drivers/gpu/host1x/hw/hw_host1x06_vm.h new file mode 100644 index 0000000000..818564a76b --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x06_vm.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2017 NVIDIA Corporation. + */ + +#define HOST1X_CHANNEL_DMASTART 0x0000 +#define HOST1X_CHANNEL_DMASTART_HI 0x0004 +#define HOST1X_CHANNEL_DMAPUT 0x0008 +#define HOST1X_CHANNEL_DMAPUT_HI 0x000c +#define HOST1X_CHANNEL_DMAGET 0x0010 +#define HOST1X_CHANNEL_DMAGET_HI 0x0014 +#define HOST1X_CHANNEL_DMAEND 0x0018 +#define HOST1X_CHANNEL_DMAEND_HI 0x001c +#define HOST1X_CHANNEL_DMACTRL 0x0020 +#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0) +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1) +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2) +#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024 +#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13) +#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028 +#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030 +#define HOST1X_CHANNEL_CMDP_CLASS 0x0034 +#define HOST1X_CHANNEL_CHANNELSTAT 0x0038 +#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048 +#define HOST1X_CHANNEL_TEARDOWN 0x004c + +#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6464 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x652c + 4*(x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x6590 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_BASE(x) (0x8000 + 4*(x)) +#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0x8a00 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0x9384 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) diff --git a/drivers/gpu/host1x/hw/hw_host1x07_channel.h b/drivers/gpu/host1x/hw/hw_host1x07_channel.h new file mode 100644 index 0000000000..96fa72bbd7 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x07_channel.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X07_CHANNEL_H +#define HOST1X_HW_HOST1X07_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h new file mode 100644 index 0000000000..52141d5395 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#define HOST1X_HV_SYNCPT_PROT_EN 0x1ac4 +#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1) +#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x2020 + (x * 4)) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL 0x233c +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(x) (x) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(x) ((x) << 16) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE BIT(31) +#define HOST1X_HV_CMDFIFO_PEEK_READ 0x2340 +#define HOST1X_HV_CMDFIFO_PEEK_PTRS 0x2344 +#define HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(x) (((x) >> 16) & 0xfff) +#define HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(x) ((x) & 0xfff) +#define HOST1X_HV_CMDFIFO_SETUP(x) (0x2588 + (x * 4)) +#define HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(x) (((x) >> 16) & 0xfff) +#define HOST1X_HV_CMDFIFO_SETUP_BASE_V(x) ((x) & 0xfff) +#define HOST1X_HV_ICG_EN_OVERRIDE 0x2aa8 diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h new file mode 100644 index 0000000000..887b878f92 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X07_UCLASS_H +#define HOST1X_HW_HOST1X07_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x07_vm.h b/drivers/gpu/host1x/hw/hw_host1x07_vm.h new file mode 100644 index 0000000000..b766851d5b --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x07_vm.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#define HOST1X_CHANNEL_DMASTART 0x0000 +#define HOST1X_CHANNEL_DMASTART_HI 0x0004 +#define HOST1X_CHANNEL_DMAPUT 0x0008 +#define HOST1X_CHANNEL_DMAPUT_HI 0x000c +#define HOST1X_CHANNEL_DMAGET 0x0010 +#define HOST1X_CHANNEL_DMAGET_HI 0x0014 +#define HOST1X_CHANNEL_DMAEND 0x0018 +#define HOST1X_CHANNEL_DMAEND_HI 0x001c +#define HOST1X_CHANNEL_DMACTRL 0x0020 +#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0) +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1) +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2) +#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024 +#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13) +#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028 +#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030 +#define HOST1X_CHANNEL_CMDP_CLASS 0x0034 +#define HOST1X_CHANNEL_CHANNELSTAT 0x0038 +#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048 +#define HOST1X_CHANNEL_TEARDOWN 0x004c + +#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6464 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x652c + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x6590 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0x9980 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xa604 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) diff --git a/drivers/gpu/host1x/hw/hw_host1x08_channel.h b/drivers/gpu/host1x/hw/hw_host1x08_channel.h new file mode 100644 index 0000000000..c9272d2ab1 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_channel.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X08_CHANNEL_H +#define HOST1X_HW_HOST1X08_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h new file mode 100644 index 0000000000..8e0c99150e --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_COMMON_OFA_MLOCK 0x4050 +#define HOST1X_COMMON_NVJPG1_MLOCK 0x4070 +#define HOST1X_COMMON_VIC_MLOCK 0x4078 +#define HOST1X_COMMON_NVENC_MLOCK 0x407c +#define HOST1X_COMMON_NVDEC_MLOCK 0x4080 +#define HOST1X_COMMON_NVJPG_MLOCK 0x4084 diff --git a/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h new file mode 100644 index 0000000000..22964324c9 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_HV_SYNCPT_PROT_EN 0x1724 +#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1) +#define HOST1X_HV_CH_MLOCK_EN(x) (0x1700 + (x * 4)) +#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x1710 + (x * 4)) diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h new file mode 100644 index 0000000000..4fb1d090ed --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X08_UCLASS_H +#define HOST1X_HW_HOST1X08_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_vm.h b/drivers/gpu/host1x/hw/hw_host1x08_vm.h new file mode 100644 index 0000000000..1455a4670b --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_vm.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_CHANNEL_DMASTART 0x0000 +#define HOST1X_CHANNEL_DMASTART_HI 0x0004 +#define HOST1X_CHANNEL_DMAPUT 0x0008 +#define HOST1X_CHANNEL_DMAPUT_HI 0x000c +#define HOST1X_CHANNEL_DMAGET 0x0010 +#define HOST1X_CHANNEL_DMAGET_HI 0x0014 +#define HOST1X_CHANNEL_DMAEND 0x0018 +#define HOST1X_CHANNEL_DMAEND_HI 0x001c +#define HOST1X_CHANNEL_DMACTRL 0x0020 +#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0) +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1) +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2) +#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024 +#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13) +#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028 +#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030 +#define HOST1X_CHANNEL_CMDP_CLASS 0x0034 +#define HOST1X_CHANNEL_CHANNELSTAT 0x0038 +#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048 +#define HOST1X_CHANNEL_TEARDOWN 0x004c +#define HOST1X_CHANNEL_SMMU_STREAMID 0x0084 + +#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6600 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INTR_DEST(x) (0x6684 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x770c + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x7790 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0xa088 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xb090 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c new file mode 100644 index 0000000000..b915ef7d03 --- /dev/null +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x Interrupt Management + * + * Copyright (C) 2010 Google, Inc. + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/io.h> + +#include "../intr.h" +#include "../dev.h" + +static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) +{ + struct host1x *host = dev_id; + unsigned long reg; + unsigned int i, id; + + for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) { + reg = host1x_sync_readl(host, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + + host1x_sync_writel(host, reg, + HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i)); + host1x_sync_writel(host, reg, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + + for_each_set_bit(id, ®, 32) + host1x_intr_handle_interrupt(host, i * 32 + id); + } + + return IRQ_HANDLED; +} + +static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host) +{ + unsigned int i; + + for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); ++i) { + host1x_sync_writel(host, 0xffffffffu, + HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i)); + host1x_sync_writel(host, 0xffffffffu, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + } +} + +static void intr_hw_init(struct host1x *host, u32 cpm) +{ +#if HOST1X_HW < 6 + /* disable the ip_busy_timeout. this prevents write drops */ + host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT); + + /* + * increase the auto-ack timout to the maximum value. 2d will hang + * otherwise on Tegra2. + */ + host1x_sync_writel(host, 0xff, HOST1X_SYNC_CTXSW_TIMEOUT_CFG); + + /* update host clocks per usec */ + host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK); +#endif +#if HOST1X_HW >= 8 + u32 id; + + /* + * Program threshold interrupt destination among 8 lines per VM, + * per syncpoint. For now, just direct all to the first interrupt + * line. + */ + for (id = 0; id < host->info->nb_pts; id++) + host1x_sync_writel(host, 0, HOST1X_SYNC_SYNCPT_INTR_DEST(id)); +#endif +} + +static int +host1x_intr_init_host_sync(struct host1x *host, u32 cpm) +{ + int err; + + host1x_hw_intr_disable_all_syncpt_intrs(host); + + err = devm_request_irq(host->dev, host->syncpt_irq, + syncpt_thresh_isr, IRQF_SHARED, + "host1x_syncpt", host); + if (err < 0) + return err; + + intr_hw_init(host, cpm); + + return 0; +} + +static void host1x_intr_set_syncpt_threshold(struct host1x *host, + unsigned int id, + u32 thresh) +{ + host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id)); +} + +static void host1x_intr_enable_syncpt_intr(struct host1x *host, + unsigned int id) +{ + host1x_sync_writel(host, BIT(id % 32), + HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id / 32)); +} + +static void host1x_intr_disable_syncpt_intr(struct host1x *host, + unsigned int id) +{ + host1x_sync_writel(host, BIT(id % 32), + HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32)); + host1x_sync_writel(host, BIT(id % 32), + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32)); +} + +static const struct host1x_intr_ops host1x_intr_ops = { + .init_host_sync = host1x_intr_init_host_sync, + .set_syncpt_threshold = host1x_intr_set_syncpt_threshold, + .enable_syncpt_intr = host1x_intr_enable_syncpt_intr, + .disable_syncpt_intr = host1x_intr_disable_syncpt_intr, + .disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs, +}; diff --git a/drivers/gpu/host1x/hw/opcodes.h b/drivers/gpu/host1x/hw/opcodes.h new file mode 100644 index 0000000000..649614499b --- /dev/null +++ b/drivers/gpu/host1x/hw/opcodes.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x opcodes + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef __HOST1X_OPCODES_H +#define __HOST1X_OPCODES_H + +#include <linux/types.h> + +static inline u32 host1x_class_host_wait_syncpt( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_wait_syncpt_indx_f(indx) + | host1x_uclass_wait_syncpt_thresh_f(threshold); +} + +static inline u32 host1x_class_host_load_syncpt_base( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_load_syncpt_base_base_indx_f(indx) + | host1x_uclass_load_syncpt_base_value_f(threshold); +} + +static inline u32 host1x_class_host_wait_syncpt_base( + unsigned indx, unsigned base_indx, unsigned offset) +{ + return host1x_uclass_wait_syncpt_base_indx_f(indx) + | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_wait_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt_base( + unsigned base_indx, unsigned offset) +{ + return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_incr_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt( + unsigned cond, unsigned indx) +{ + return host1x_uclass_incr_syncpt_cond_f(cond) + | host1x_uclass_incr_syncpt_indx_f(indx); +} + +static inline u32 host1x_class_host_indoff_reg_write( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indbe_f(0xf) + | host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + +static inline u32 host1x_class_host_indoff_reg_read( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset) + | host1x_uclass_indoff_rwn_read_v(); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + +static inline u32 host1x_opcode_setclass( + unsigned class_id, unsigned offset, unsigned mask) +{ + return (0 << 28) | (offset << 16) | (class_id << 6) | mask; +} + +static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) +{ + return (1 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) +{ + return (2 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) +{ + return (3 << 28) | (offset << 16) | mask; +} + +static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) +{ + return (4 << 28) | (offset << 16) | value; +} + +static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) +{ + return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), + host1x_class_host_incr_syncpt(cond, indx)); +} + +static inline u32 host1x_opcode_restart(unsigned address) +{ + return (5 << 28) | (address >> 4); +} + +static inline u32 host1x_opcode_gather(unsigned count) +{ + return (6 << 28) | count; +} + +static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | count; +} + +static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; +} + +static inline u32 host1x_opcode_setstreamid(unsigned streamid) +{ + return (7 << 28) | streamid; +} + +static inline u32 host1x_opcode_setpayload(unsigned payload) +{ + return (9 << 28) | payload; +} + +static inline u32 host1x_opcode_gather_wide(unsigned count) +{ + return (12 << 28) | count; +} + +static inline u32 host1x_opcode_acquire_mlock(unsigned mlock) +{ + return (14 << 28) | (0 << 24) | mlock; +} + +static inline u32 host1x_opcode_release_mlock(unsigned mlock) +{ + return (14 << 28) | (1 << 24) | mlock; +} + +#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) + +#endif diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c new file mode 100644 index 0000000000..8cf35b2eff --- /dev/null +++ b/drivers/gpu/host1x/hw/syncpt_hw.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x Syncpoints + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + +#include <linux/io.h> + +#include "../dev.h" +#include "../syncpt.h" + +/* + * Write the current syncpoint value back to hw. + */ +static void syncpt_restore(struct host1x_syncpt *sp) +{ + u32 min = host1x_syncpt_read_min(sp); + struct host1x *host = sp->host; + + host1x_sync_writel(host, min, HOST1X_SYNC_SYNCPT(sp->id)); +} + +/* + * Write the current waitbase value back to hw. + */ +static void syncpt_restore_wait_base(struct host1x_syncpt *sp) +{ +#if HOST1X_HW < 7 + struct host1x *host = sp->host; + + host1x_sync_writel(host, sp->base_val, + HOST1X_SYNC_SYNCPT_BASE(sp->id)); +#endif +} + +/* + * Read waitbase value from hw. + */ +static void syncpt_read_wait_base(struct host1x_syncpt *sp) +{ +#if HOST1X_HW < 7 + struct host1x *host = sp->host; + + sp->base_val = + host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(sp->id)); +#endif +} + +/* + * Updates the last value read from hardware. + */ +static u32 syncpt_load(struct host1x_syncpt *sp) +{ + struct host1x *host = sp->host; + u32 old, live; + + /* Loop in case there's a race writing to min_val */ + do { + old = host1x_syncpt_read_min(sp); + live = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT(sp->id)); + } while ((u32)atomic_cmpxchg(&sp->min_val, old, live) != old); + + if (!host1x_syncpt_check_max(sp, live)) + dev_err(host->dev, "%s failed: id=%u, min=%d, max=%d\n", + __func__, sp->id, host1x_syncpt_read_min(sp), + host1x_syncpt_read_max(sp)); + + return live; +} + +/* + * Write a cpu syncpoint increment to the hardware, without touching + * the cache. + */ +static int syncpt_cpu_incr(struct host1x_syncpt *sp) +{ + struct host1x *host = sp->host; + u32 reg_offset = sp->id / 32; + + if (!host1x_syncpt_client_managed(sp) && + host1x_syncpt_idle(sp)) + return -EINVAL; + + host1x_sync_writel(host, BIT(sp->id % 32), + HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset)); + wmb(); + + return 0; +} + +/** + * syncpt_assign_to_channel() - Assign syncpoint to channel + * @sp: syncpoint + * @ch: channel + * + * On chips with the syncpoint protection feature (Tegra186+), assign @sp to + * @ch, preventing other channels from incrementing the syncpoints. If @ch is + * NULL, unassigns the syncpoint. + * + * On older chips, do nothing. + */ +static void syncpt_assign_to_channel(struct host1x_syncpt *sp, + struct host1x_channel *ch) +{ +#if HOST1X_HW >= 6 + struct host1x *host = sp->host; + + host1x_sync_writel(host, + HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff), + HOST1X_SYNC_SYNCPT_CH_APP(sp->id)); +#endif +} + +/** + * syncpt_enable_protection() - Enable syncpoint protection + * @host: host1x instance + * + * On chips with the syncpoint protection feature (Tegra186+), enable this + * feature. On older chips, do nothing. + */ +static void syncpt_enable_protection(struct host1x *host) +{ +#if HOST1X_HW >= 6 + if (!host->hv_regs) + return; + + host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN, + HOST1X_HV_SYNCPT_PROT_EN); +#endif +} + +static const struct host1x_syncpt_ops host1x_syncpt_ops = { + .restore = syncpt_restore, + .restore_wait_base = syncpt_restore_wait_base, + .load_wait_base = syncpt_read_wait_base, + .load = syncpt_load, + .cpu_incr = syncpt_cpu_incr, + .assign_to_channel = syncpt_assign_to_channel, + .enable_protection = syncpt_enable_protection, +}; diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c new file mode 100644 index 0000000000..995bfa9808 --- /dev/null +++ b/drivers/gpu/host1x/intr.c @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x Interrupt Management + * + * Copyright (c) 2010-2021, NVIDIA Corporation. + */ + +#include <linux/clk.h> + +#include "dev.h" +#include "fence.h" +#include "intr.h" + +static void host1x_intr_add_fence_to_list(struct host1x_fence_list *list, + struct host1x_syncpt_fence *fence) +{ + struct host1x_syncpt_fence *fence_in_list; + + list_for_each_entry_reverse(fence_in_list, &list->list, list) { + if ((s32)(fence_in_list->threshold - fence->threshold) <= 0) { + /* Fence in list is before us, we can insert here */ + list_add(&fence->list, &fence_in_list->list); + return; + } + } + + /* Add as first in list */ + list_add(&fence->list, &list->list); +} + +static void host1x_intr_update_hw_state(struct host1x *host, struct host1x_syncpt *sp) +{ + struct host1x_syncpt_fence *fence; + + if (!list_empty(&sp->fences.list)) { + fence = list_first_entry(&sp->fences.list, struct host1x_syncpt_fence, list); + + host1x_hw_intr_set_syncpt_threshold(host, sp->id, fence->threshold); + host1x_hw_intr_enable_syncpt_intr(host, sp->id); + } else { + host1x_hw_intr_disable_syncpt_intr(host, sp->id); + } +} + +void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence) +{ + struct host1x_fence_list *fence_list = &fence->sp->fences; + + INIT_LIST_HEAD(&fence->list); + + host1x_intr_add_fence_to_list(fence_list, fence); + host1x_intr_update_hw_state(host, fence->sp); +} + +bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence) +{ + struct host1x_fence_list *fence_list = &fence->sp->fences; + unsigned long irqflags; + + spin_lock_irqsave(&fence_list->lock, irqflags); + + if (list_empty(&fence->list)) { + spin_unlock_irqrestore(&fence_list->lock, irqflags); + return false; + } + + list_del_init(&fence->list); + host1x_intr_update_hw_state(host, fence->sp); + + spin_unlock_irqrestore(&fence_list->lock, irqflags); + + return true; +} + +void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id) +{ + struct host1x_syncpt *sp = &host->syncpt[id]; + struct host1x_syncpt_fence *fence, *tmp; + unsigned int value; + + value = host1x_syncpt_load(sp); + + spin_lock(&sp->fences.lock); + + list_for_each_entry_safe(fence, tmp, &sp->fences.list, list) { + if (((value - fence->threshold) & 0x80000000U) != 0U) { + /* Fence is not yet expired, we are done */ + break; + } + + list_del_init(&fence->list); + host1x_fence_signal(fence); + } + + /* Re-enable interrupt if necessary */ + host1x_intr_update_hw_state(host, sp); + + spin_unlock(&sp->fences.lock); +} + +int host1x_intr_init(struct host1x *host) +{ + unsigned int id; + + mutex_init(&host->intr_mutex); + + for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) { + struct host1x_syncpt *syncpt = &host->syncpt[id]; + + spin_lock_init(&syncpt->fences.lock); + INIT_LIST_HEAD(&syncpt->fences.list); + } + + return 0; +} + +void host1x_intr_deinit(struct host1x *host) +{ +} + +void host1x_intr_start(struct host1x *host) +{ + u32 hz = clk_get_rate(host->clk); + int err; + + mutex_lock(&host->intr_mutex); + err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000)); + if (err) { + mutex_unlock(&host->intr_mutex); + return; + } + mutex_unlock(&host->intr_mutex); +} + +void host1x_intr_stop(struct host1x *host) +{ + host1x_hw_intr_disable_all_syncpt_intrs(host); +} diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h new file mode 100644 index 0000000000..3b5610b525 --- /dev/null +++ b/drivers/gpu/host1x/intr.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Interrupt Management + * + * Copyright (c) 2010-2021, NVIDIA Corporation. + */ + +#ifndef __HOST1X_INTR_H +#define __HOST1X_INTR_H + +struct host1x; +struct host1x_syncpt_fence; + +/* Initialize host1x sync point interrupt */ +int host1x_intr_init(struct host1x *host); + +/* Deinitialize host1x sync point interrupt */ +void host1x_intr_deinit(struct host1x *host); + +/* Enable host1x sync point interrupt */ +void host1x_intr_start(struct host1x *host); + +/* Disable host1x sync point interrupt */ +void host1x_intr_stop(struct host1x *host); + +void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id); + +void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence); + +bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence); + +#endif diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c new file mode 100644 index 0000000000..3ed49e1fd9 --- /dev/null +++ b/drivers/gpu/host1x/job.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x Job + * + * Copyright (c) 2010-2015, NVIDIA Corporation. + */ + +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/host1x.h> +#include <linux/iommu.h> +#include <linux/kref.h> +#include <linux/module.h> +#include <linux/scatterlist.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <trace/events/host1x.h> + +#include "channel.h" +#include "dev.h" +#include "job.h" +#include "syncpt.h" + +#define HOST1X_WAIT_SYNCPT_OFFSET 0x8 + +struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, + u32 num_cmdbufs, u32 num_relocs, + bool skip_firewall) +{ + struct host1x_job *job = NULL; + unsigned int num_unpins = num_relocs; + bool enable_firewall; + u64 total; + void *mem; + + enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall; + + if (!enable_firewall) + num_unpins += num_cmdbufs; + + /* Check that we're not going to overflow */ + total = sizeof(struct host1x_job) + + (u64)num_relocs * sizeof(struct host1x_reloc) + + (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + + (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + + (u64)num_unpins * sizeof(dma_addr_t) + + (u64)num_unpins * sizeof(u32 *); + if (total > ULONG_MAX) + return NULL; + + mem = job = kzalloc(total, GFP_KERNEL); + if (!job) + return NULL; + + job->enable_firewall = enable_firewall; + + kref_init(&job->ref); + job->channel = ch; + + /* Redistribute memory to the structs */ + mem += sizeof(struct host1x_job); + job->relocs = num_relocs ? mem : NULL; + mem += num_relocs * sizeof(struct host1x_reloc); + job->unpins = num_unpins ? mem : NULL; + mem += num_unpins * sizeof(struct host1x_job_unpin_data); + job->cmds = num_cmdbufs ? mem : NULL; + mem += num_cmdbufs * sizeof(struct host1x_job_cmd); + job->addr_phys = num_unpins ? mem : NULL; + + job->reloc_addr_phys = job->addr_phys; + job->gather_addr_phys = &job->addr_phys[num_relocs]; + + return job; +} +EXPORT_SYMBOL(host1x_job_alloc); + +struct host1x_job *host1x_job_get(struct host1x_job *job) +{ + kref_get(&job->ref); + return job; +} +EXPORT_SYMBOL(host1x_job_get); + +static void job_free(struct kref *ref) +{ + struct host1x_job *job = container_of(ref, struct host1x_job, ref); + + if (job->release) + job->release(job); + + if (job->fence) { + /* + * remove_callback is atomic w.r.t. fence signaling, so + * after the call returns, we know that the callback is not + * in execution, and the fence can be safely freed. + */ + dma_fence_remove_callback(job->fence, &job->fence_cb); + dma_fence_put(job->fence); + } + + if (job->syncpt) + host1x_syncpt_put(job->syncpt); + + kfree(job); +} + +void host1x_job_put(struct host1x_job *job) +{ + kref_put(&job->ref, job_free); +} +EXPORT_SYMBOL(host1x_job_put); + +void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, + unsigned int words, unsigned int offset) +{ + struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; + + gather->words = words; + gather->bo = bo; + gather->offset = offset; + + job->num_cmds++; +} +EXPORT_SYMBOL(host1x_job_add_gather); + +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class) +{ + struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; + + cmd->is_wait = true; + cmd->wait.id = id; + cmd->wait.threshold = thresh; + cmd->wait.next_class = next_class; + cmd->wait.relative = relative; + + job->num_cmds++; +} +EXPORT_SYMBOL(host1x_job_add_wait); + +static unsigned int pin_job(struct host1x *host, struct host1x_job *job) +{ + unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; + struct host1x_client *client = job->client; + struct device *dev = client->dev; + struct host1x_job_gather *g; + unsigned int i; + int err; + + job->num_unpins = 0; + + for (i = 0; i < job->num_relocs; i++) { + struct host1x_reloc *reloc = &job->relocs[i]; + enum dma_data_direction direction; + struct host1x_bo_mapping *map; + struct host1x_bo *bo; + + reloc->target.bo = host1x_bo_get(reloc->target.bo); + if (!reloc->target.bo) { + err = -EINVAL; + goto unpin; + } + + bo = reloc->target.bo; + + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + direction = DMA_TO_DEVICE; + break; + + case HOST1X_RELOC_WRITE: + direction = DMA_FROM_DEVICE; + break; + + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + direction = DMA_BIDIRECTIONAL; + break; + + default: + err = -EINVAL; + goto unpin; + } + + map = host1x_bo_pin(dev, bo, direction, NULL); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; + } + + /* + * host1x clients are generally not able to do scatter-gather themselves, so fail + * if the buffer is discontiguous and we fail to map its SG table to a single + * contiguous chunk of I/O virtual memory. + */ + if (map->chunks > 1) { + err = -EINVAL; + goto unpin; + } + + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; + job->num_unpins++; + } + + /* + * We will copy gathers BO content later, so there is no need to + * hold and pin them. + */ + if (job->enable_firewall) + return 0; + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_bo_mapping *map; + size_t gather_size = 0; + struct scatterlist *sg; + unsigned long shift; + struct iova *alloc; + unsigned int j; + + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + + g->bo = host1x_bo_get(g->bo); + if (!g->bo) { + err = -EINVAL; + goto unpin; + } + + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; + } + + if (host->domain) { + for_each_sgtable_sg(map->sgt, sg, j) + gather_size += sg->length; + + gather_size = iova_align(&host->iova, gather_size); + + shift = iova_shift(&host->iova); + alloc = alloc_iova(&host->iova, gather_size >> shift, + host->iova_end >> shift, true); + if (!alloc) { + err = -ENOMEM; + goto put; + } + + err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc), + map->sgt, IOMMU_READ); + if (err == 0) { + __free_iova(&host->iova, alloc); + err = -EINVAL; + goto put; + } + + map->phys = iova_dma_addr(&host->iova, alloc); + map->size = gather_size; + } + + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; + job->num_unpins++; + + job->gather_addr_phys[i] = map->phys; + } + + return 0; + +put: + host1x_bo_put(g->bo); +unpin: + host1x_job_unpin(job); + return err; +} + +static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) +{ + void *cmdbuf_addr = NULL; + struct host1x_bo *cmdbuf = g->bo; + unsigned int i; + + /* pin & patch the relocs for one gather */ + for (i = 0; i < job->num_relocs; i++) { + struct host1x_reloc *reloc = &job->relocs[i]; + u32 reloc_addr = (job->reloc_addr_phys[i] + + reloc->target.offset) >> reloc->shift; + u32 *target; + + /* skip all other gathers */ + if (cmdbuf != reloc->cmdbuf.bo) + continue; + + if (job->enable_firewall) { + target = (u32 *)job->gather_copy_mapped + + reloc->cmdbuf.offset / sizeof(u32) + + g->offset / sizeof(u32); + goto patch_reloc; + } + + if (!cmdbuf_addr) { + cmdbuf_addr = host1x_bo_mmap(cmdbuf); + + if (unlikely(!cmdbuf_addr)) { + pr_err("Could not map cmdbuf for relocation\n"); + return -ENOMEM; + } + } + + target = cmdbuf_addr + reloc->cmdbuf.offset; +patch_reloc: + *target = reloc_addr; + } + + if (cmdbuf_addr) + host1x_bo_munmap(cmdbuf, cmdbuf_addr); + + return 0; +} + +static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, + unsigned int offset) +{ + offset *= sizeof(u32); + + if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset) + return false; + + /* relocation shift value validation isn't implemented yet */ + if (reloc->shift) + return false; + + return true; +} + +struct host1x_firewall { + struct host1x_job *job; + struct device *dev; + + unsigned int num_relocs; + struct host1x_reloc *reloc; + + struct host1x_bo *cmdbuf; + unsigned int offset; + + u32 words; + u32 class; + u32 reg; + u32 mask; + u32 count; +}; + +static int check_register(struct host1x_firewall *fw, unsigned long offset) +{ + if (!fw->job->is_addr_reg) + return 0; + + if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) { + if (!fw->num_relocs) + return -EINVAL; + + if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset)) + return -EINVAL; + + fw->num_relocs--; + fw->reloc++; + } + + return 0; +} + +static int check_class(struct host1x_firewall *fw, u32 class) +{ + if (!fw->job->is_valid_class) { + if (fw->class != class) + return -EINVAL; + } else { + if (!fw->job->is_valid_class(fw->class)) + return -EINVAL; + } + + return 0; +} + +static int check_mask(struct host1x_firewall *fw) +{ + u32 mask = fw->mask; + u32 reg = fw->reg; + int ret; + + while (mask) { + if (fw->words == 0) + return -EINVAL; + + if (mask & 1) { + ret = check_register(fw, reg); + if (ret < 0) + return ret; + + fw->words--; + fw->offset++; + } + mask >>= 1; + reg++; + } + + return 0; +} + +static int check_incr(struct host1x_firewall *fw) +{ + u32 count = fw->count; + u32 reg = fw->reg; + int ret; + + while (count) { + if (fw->words == 0) + return -EINVAL; + + ret = check_register(fw, reg); + if (ret < 0) + return ret; + + reg++; + fw->words--; + fw->offset++; + count--; + } + + return 0; +} + +static int check_nonincr(struct host1x_firewall *fw) +{ + u32 count = fw->count; + int ret; + + while (count) { + if (fw->words == 0) + return -EINVAL; + + ret = check_register(fw, fw->reg); + if (ret < 0) + return ret; + + fw->words--; + fw->offset++; + count--; + } + + return 0; +} + +static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g) +{ + u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped + + (g->offset / sizeof(u32)); + u32 job_class = fw->class; + int err = 0; + + fw->words = g->words; + fw->cmdbuf = g->bo; + fw->offset = 0; + + while (fw->words && !err) { + u32 word = cmdbuf_base[fw->offset]; + u32 opcode = (word & 0xf0000000) >> 28; + + fw->mask = 0; + fw->reg = 0; + fw->count = 0; + fw->words--; + fw->offset++; + + switch (opcode) { + case 0: + fw->class = word >> 6 & 0x3ff; + fw->mask = word & 0x3f; + fw->reg = word >> 16 & 0xfff; + err = check_class(fw, job_class); + if (!err) + err = check_mask(fw); + if (err) + goto out; + break; + case 1: + fw->reg = word >> 16 & 0xfff; + fw->count = word & 0xffff; + err = check_incr(fw); + if (err) + goto out; + break; + + case 2: + fw->reg = word >> 16 & 0xfff; + fw->count = word & 0xffff; + err = check_nonincr(fw); + if (err) + goto out; + break; + + case 3: + fw->mask = word & 0xffff; + fw->reg = word >> 16 & 0xfff; + err = check_mask(fw); + if (err) + goto out; + break; + case 4: + case 14: + break; + default: + err = -EINVAL; + break; + } + } + +out: + return err; +} + +static inline int copy_gathers(struct device *host, struct host1x_job *job, + struct device *dev) +{ + struct host1x_firewall fw; + size_t size = 0; + size_t offset = 0; + unsigned int i; + + fw.job = job; + fw.dev = dev; + fw.reloc = job->relocs; + fw.num_relocs = job->num_relocs; + fw.class = job->class; + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + + size += g->words * sizeof(u32); + } + + /* + * Try a non-blocking allocation from a higher priority pools first, + * as awaiting for the allocation here is a major performance hit. + */ + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, + GFP_NOWAIT); + + /* the higher priority allocation failed, try the generic-blocking */ + if (!job->gather_copy_mapped) + job->gather_copy_mapped = dma_alloc_wc(host, size, + &job->gather_copy, + GFP_KERNEL); + if (!job->gather_copy_mapped) + return -ENOMEM; + + job->gather_copy_size = size; + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + void *gather; + + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; + + /* Copy the gather */ + gather = host1x_bo_mmap(g->bo); + memcpy(job->gather_copy_mapped + offset, gather + g->offset, + g->words * sizeof(u32)); + host1x_bo_munmap(g->bo, gather); + + /* Store the location in the buffer */ + g->base = job->gather_copy; + g->offset = offset; + + /* Validate the job */ + if (validate(&fw, g)) + return -EINVAL; + + offset += g->words * sizeof(u32); + } + + /* No relocs should remain at this point */ + if (fw.num_relocs) + return -EINVAL; + + return 0; +} + +int host1x_job_pin(struct host1x_job *job, struct device *dev) +{ + int err; + unsigned int i, j; + struct host1x *host = dev_get_drvdata(dev->parent); + + /* pin memory */ + err = pin_job(host, job); + if (err) + goto out; + + if (job->enable_firewall) { + err = copy_gathers(host->dev, job, dev); + if (err) + goto out; + } + + /* patch gathers */ + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; + + /* process each gather mem only once */ + if (g->handled) + continue; + + /* copy_gathers() sets gathers base if firewall is enabled */ + if (!job->enable_firewall) + g->base = job->gather_addr_phys[i]; + + for (j = i + 1; j < job->num_cmds; j++) { + if (!job->cmds[j].is_wait && + job->cmds[j].gather.bo == g->bo) { + job->cmds[j].gather.handled = true; + job->cmds[j].gather.base = g->base; + } + } + + err = do_relocs(job, g); + if (err) + break; + } + +out: + if (err) + host1x_job_unpin(job); + wmb(); + + return err; +} +EXPORT_SYMBOL(host1x_job_pin); + +void host1x_job_unpin(struct host1x_job *job) +{ + struct host1x *host = dev_get_drvdata(job->channel->dev->parent); + unsigned int i; + + for (i = 0; i < job->num_unpins; i++) { + struct host1x_bo_mapping *map = job->unpins[i].map; + struct host1x_bo *bo = map->bo; + + if (!job->enable_firewall && map->size && host->domain) { + iommu_unmap(host->domain, job->addr_phys[i], map->size); + free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i])); + } + + host1x_bo_unpin(map); + host1x_bo_put(bo); + } + + job->num_unpins = 0; + + if (job->gather_copy_size) + dma_free_wc(host->dev, job->gather_copy_size, + job->gather_copy_mapped, job->gather_copy); +} +EXPORT_SYMBOL(host1x_job_unpin); + +/* + * Debug routine used to dump job entries + */ +void host1x_job_dump(struct device *dev, struct host1x_job *job) +{ + dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id); + dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end); + dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get); + dev_dbg(dev, " TIMEOUT %d\n", job->timeout); + dev_dbg(dev, " NUM_SLOTS %d\n", job->num_slots); + dev_dbg(dev, " NUM_HANDLES %d\n", job->num_unpins); +} diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h new file mode 100644 index 0000000000..dad5a19466 --- /dev/null +++ b/drivers/gpu/host1x/job.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Job + * + * Copyright (c) 2011-2013, NVIDIA Corporation. + */ + +#ifndef __HOST1X_JOB_H +#define __HOST1X_JOB_H + +#include <linux/dma-direction.h> + +struct host1x_job_gather { + unsigned int words; + dma_addr_t base; + struct host1x_bo *bo; + unsigned int offset; + bool handled; +}; + +struct host1x_job_wait { + u32 id; + u32 threshold; + u32 next_class; + bool relative; +}; + +struct host1x_job_cmd { + bool is_wait; + + union { + struct host1x_job_gather gather; + struct host1x_job_wait wait; + }; +}; + +struct host1x_job_unpin_data { + struct host1x_bo_mapping *map; +}; + +/* + * Dump contents of job to debug output. + */ +void host1x_job_dump(struct device *dev, struct host1x_job *job); + +#endif diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c new file mode 100644 index 0000000000..4dcec535ec --- /dev/null +++ b/drivers/gpu/host1x/mipi.c @@ -0,0 +1,554 @@ +/* + * Copyright (C) 2013 NVIDIA Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that copyright + * notice and this permission notice appear in supporting documentation, and + * that the name of the copyright holders not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. The copyright holders make no representations + * about the suitability of this software for any purpose. It is provided "as + * is" without express or implied warranty. + * + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE + * OF THIS SOFTWARE. + */ + +#include <linux/clk.h> +#include <linux/host1x.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "dev.h" + +#define MIPI_CAL_CTRL 0x00 +#define MIPI_CAL_CTRL_NOISE_FILTER(x) (((x) & 0xf) << 26) +#define MIPI_CAL_CTRL_PRESCALE(x) (((x) & 0x3) << 24) +#define MIPI_CAL_CTRL_CLKEN_OVR (1 << 4) +#define MIPI_CAL_CTRL_START (1 << 0) + +#define MIPI_CAL_AUTOCAL_CTRL 0x01 + +#define MIPI_CAL_STATUS 0x02 +#define MIPI_CAL_STATUS_DONE (1 << 16) +#define MIPI_CAL_STATUS_ACTIVE (1 << 0) + +#define MIPI_CAL_CONFIG_CSIA 0x05 +#define MIPI_CAL_CONFIG_CSIB 0x06 +#define MIPI_CAL_CONFIG_CSIC 0x07 +#define MIPI_CAL_CONFIG_CSID 0x08 +#define MIPI_CAL_CONFIG_CSIE 0x09 +#define MIPI_CAL_CONFIG_CSIF 0x0a +#define MIPI_CAL_CONFIG_DSIA 0x0e +#define MIPI_CAL_CONFIG_DSIB 0x0f +#define MIPI_CAL_CONFIG_DSIC 0x10 +#define MIPI_CAL_CONFIG_DSID 0x11 + +#define MIPI_CAL_CONFIG_DSIA_CLK 0x19 +#define MIPI_CAL_CONFIG_DSIB_CLK 0x1a +#define MIPI_CAL_CONFIG_CSIAB_CLK 0x1b +#define MIPI_CAL_CONFIG_DSIC_CLK 0x1c +#define MIPI_CAL_CONFIG_CSICD_CLK 0x1c +#define MIPI_CAL_CONFIG_DSID_CLK 0x1d +#define MIPI_CAL_CONFIG_CSIE_CLK 0x1d + +/* for data and clock lanes */ +#define MIPI_CAL_CONFIG_SELECT (1 << 21) + +/* for data lanes */ +#define MIPI_CAL_CONFIG_HSPDOS(x) (((x) & 0x1f) << 16) +#define MIPI_CAL_CONFIG_HSPUOS(x) (((x) & 0x1f) << 8) +#define MIPI_CAL_CONFIG_TERMOS(x) (((x) & 0x1f) << 0) + +/* for clock lanes */ +#define MIPI_CAL_CONFIG_HSCLKPDOSD(x) (((x) & 0x1f) << 8) +#define MIPI_CAL_CONFIG_HSCLKPUOSD(x) (((x) & 0x1f) << 0) + +#define MIPI_CAL_BIAS_PAD_CFG0 0x16 +#define MIPI_CAL_BIAS_PAD_PDVCLAMP (1 << 1) +#define MIPI_CAL_BIAS_PAD_E_VCLAMP_REF (1 << 0) + +#define MIPI_CAL_BIAS_PAD_CFG1 0x17 +#define MIPI_CAL_BIAS_PAD_DRV_DN_REF(x) (((x) & 0x7) << 16) +#define MIPI_CAL_BIAS_PAD_DRV_UP_REF(x) (((x) & 0x7) << 8) + +#define MIPI_CAL_BIAS_PAD_CFG2 0x18 +#define MIPI_CAL_BIAS_PAD_VCLAMP(x) (((x) & 0x7) << 16) +#define MIPI_CAL_BIAS_PAD_VAUXP(x) (((x) & 0x7) << 4) +#define MIPI_CAL_BIAS_PAD_PDVREG (1 << 1) + +struct tegra_mipi_pad { + unsigned long data; + unsigned long clk; +}; + +struct tegra_mipi_soc { + bool has_clk_lane; + const struct tegra_mipi_pad *pads; + unsigned int num_pads; + + bool clock_enable_override; + bool needs_vclamp_ref; + + /* bias pad configuration settings */ + u8 pad_drive_down_ref; + u8 pad_drive_up_ref; + + u8 pad_vclamp_level; + u8 pad_vauxp_level; + + /* calibration settings for data lanes */ + u8 hspdos; + u8 hspuos; + u8 termos; + + /* calibration settings for clock lanes */ + u8 hsclkpdos; + u8 hsclkpuos; +}; + +struct tegra_mipi { + const struct tegra_mipi_soc *soc; + struct device *dev; + void __iomem *regs; + struct mutex lock; + struct clk *clk; + + unsigned long usage_count; +}; + +struct tegra_mipi_device { + struct platform_device *pdev; + struct tegra_mipi *mipi; + struct device *device; + unsigned long pads; +}; + +static inline u32 tegra_mipi_readl(struct tegra_mipi *mipi, + unsigned long offset) +{ + return readl(mipi->regs + (offset << 2)); +} + +static inline void tegra_mipi_writel(struct tegra_mipi *mipi, u32 value, + unsigned long offset) +{ + writel(value, mipi->regs + (offset << 2)); +} + +static int tegra_mipi_power_up(struct tegra_mipi *mipi) +{ + u32 value; + int err; + + err = clk_enable(mipi->clk); + if (err < 0) + return err; + + value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0); + value &= ~MIPI_CAL_BIAS_PAD_PDVCLAMP; + + if (mipi->soc->needs_vclamp_ref) + value |= MIPI_CAL_BIAS_PAD_E_VCLAMP_REF; + + tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0); + + value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2); + value &= ~MIPI_CAL_BIAS_PAD_PDVREG; + tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2); + + clk_disable(mipi->clk); + + return 0; +} + +static int tegra_mipi_power_down(struct tegra_mipi *mipi) +{ + u32 value; + int err; + + err = clk_enable(mipi->clk); + if (err < 0) + return err; + + /* + * The MIPI_CAL_BIAS_PAD_PDVREG controls a voltage regulator that + * supplies the DSI pads. This must be kept enabled until none of the + * DSI lanes are used anymore. + */ + value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2); + value |= MIPI_CAL_BIAS_PAD_PDVREG; + tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2); + + /* + * MIPI_CAL_BIAS_PAD_PDVCLAMP and MIPI_CAL_BIAS_PAD_E_VCLAMP_REF + * control a regulator that supplies current to the pre-driver logic. + * Powering down this regulator causes DSI to fail, so it must remain + * powered on until none of the DSI lanes are used anymore. + */ + value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0); + + if (mipi->soc->needs_vclamp_ref) + value &= ~MIPI_CAL_BIAS_PAD_E_VCLAMP_REF; + + value |= MIPI_CAL_BIAS_PAD_PDVCLAMP; + tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0); + + return 0; +} + +struct tegra_mipi_device *tegra_mipi_request(struct device *device, + struct device_node *np) +{ + struct tegra_mipi_device *dev; + struct of_phandle_args args; + int err; + + err = of_parse_phandle_with_args(np, "nvidia,mipi-calibrate", + "#nvidia,mipi-calibrate-cells", 0, + &args); + if (err < 0) + return ERR_PTR(err); + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) { + err = -ENOMEM; + goto out; + } + + dev->pdev = of_find_device_by_node(args.np); + if (!dev->pdev) { + err = -ENODEV; + goto free; + } + + dev->mipi = platform_get_drvdata(dev->pdev); + if (!dev->mipi) { + err = -EPROBE_DEFER; + goto put; + } + + of_node_put(args.np); + + dev->pads = args.args[0]; + dev->device = device; + + return dev; + +put: + platform_device_put(dev->pdev); +free: + kfree(dev); +out: + of_node_put(args.np); + return ERR_PTR(err); +} +EXPORT_SYMBOL(tegra_mipi_request); + +void tegra_mipi_free(struct tegra_mipi_device *device) +{ + platform_device_put(device->pdev); + kfree(device); +} +EXPORT_SYMBOL(tegra_mipi_free); + +int tegra_mipi_enable(struct tegra_mipi_device *dev) +{ + int err = 0; + + mutex_lock(&dev->mipi->lock); + + if (dev->mipi->usage_count++ == 0) + err = tegra_mipi_power_up(dev->mipi); + + mutex_unlock(&dev->mipi->lock); + + return err; + +} +EXPORT_SYMBOL(tegra_mipi_enable); + +int tegra_mipi_disable(struct tegra_mipi_device *dev) +{ + int err = 0; + + mutex_lock(&dev->mipi->lock); + + if (--dev->mipi->usage_count == 0) + err = tegra_mipi_power_down(dev->mipi); + + mutex_unlock(&dev->mipi->lock); + + return err; + +} +EXPORT_SYMBOL(tegra_mipi_disable); + +int tegra_mipi_finish_calibration(struct tegra_mipi_device *device) +{ + struct tegra_mipi *mipi = device->mipi; + void __iomem *status_reg = mipi->regs + (MIPI_CAL_STATUS << 2); + u32 value; + int err; + + err = readl_relaxed_poll_timeout(status_reg, value, + !(value & MIPI_CAL_STATUS_ACTIVE) && + (value & MIPI_CAL_STATUS_DONE), 50, + 250000); + mutex_unlock(&device->mipi->lock); + clk_disable(device->mipi->clk); + + return err; +} +EXPORT_SYMBOL(tegra_mipi_finish_calibration); + +int tegra_mipi_start_calibration(struct tegra_mipi_device *device) +{ + const struct tegra_mipi_soc *soc = device->mipi->soc; + unsigned int i; + u32 value; + int err; + + err = clk_enable(device->mipi->clk); + if (err < 0) + return err; + + mutex_lock(&device->mipi->lock); + + value = MIPI_CAL_BIAS_PAD_DRV_DN_REF(soc->pad_drive_down_ref) | + MIPI_CAL_BIAS_PAD_DRV_UP_REF(soc->pad_drive_up_ref); + tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG1); + + value = tegra_mipi_readl(device->mipi, MIPI_CAL_BIAS_PAD_CFG2); + value &= ~MIPI_CAL_BIAS_PAD_VCLAMP(0x7); + value &= ~MIPI_CAL_BIAS_PAD_VAUXP(0x7); + value |= MIPI_CAL_BIAS_PAD_VCLAMP(soc->pad_vclamp_level); + value |= MIPI_CAL_BIAS_PAD_VAUXP(soc->pad_vauxp_level); + tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG2); + + for (i = 0; i < soc->num_pads; i++) { + u32 clk = 0, data = 0; + + if (device->pads & BIT(i)) { + data = MIPI_CAL_CONFIG_SELECT | + MIPI_CAL_CONFIG_HSPDOS(soc->hspdos) | + MIPI_CAL_CONFIG_HSPUOS(soc->hspuos) | + MIPI_CAL_CONFIG_TERMOS(soc->termos); + clk = MIPI_CAL_CONFIG_SELECT | + MIPI_CAL_CONFIG_HSCLKPDOSD(soc->hsclkpdos) | + MIPI_CAL_CONFIG_HSCLKPUOSD(soc->hsclkpuos); + } + + tegra_mipi_writel(device->mipi, data, soc->pads[i].data); + + if (soc->has_clk_lane && soc->pads[i].clk != 0) + tegra_mipi_writel(device->mipi, clk, soc->pads[i].clk); + } + + value = tegra_mipi_readl(device->mipi, MIPI_CAL_CTRL); + value &= ~MIPI_CAL_CTRL_NOISE_FILTER(0xf); + value &= ~MIPI_CAL_CTRL_PRESCALE(0x3); + value |= MIPI_CAL_CTRL_NOISE_FILTER(0xa); + value |= MIPI_CAL_CTRL_PRESCALE(0x2); + + if (!soc->clock_enable_override) + value &= ~MIPI_CAL_CTRL_CLKEN_OVR; + else + value |= MIPI_CAL_CTRL_CLKEN_OVR; + + tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL); + + /* clear any pending status bits */ + value = tegra_mipi_readl(device->mipi, MIPI_CAL_STATUS); + tegra_mipi_writel(device->mipi, value, MIPI_CAL_STATUS); + + value = tegra_mipi_readl(device->mipi, MIPI_CAL_CTRL); + value |= MIPI_CAL_CTRL_START; + tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL); + + /* + * Wait for min 72uS to let calibration logic finish calibration + * sequence codes before waiting for pads idle state to apply the + * results. + */ + usleep_range(75, 80); + + return 0; +} +EXPORT_SYMBOL(tegra_mipi_start_calibration); + +static const struct tegra_mipi_pad tegra114_mipi_pads[] = { + { .data = MIPI_CAL_CONFIG_CSIA }, + { .data = MIPI_CAL_CONFIG_CSIB }, + { .data = MIPI_CAL_CONFIG_CSIC }, + { .data = MIPI_CAL_CONFIG_CSID }, + { .data = MIPI_CAL_CONFIG_CSIE }, + { .data = MIPI_CAL_CONFIG_DSIA }, + { .data = MIPI_CAL_CONFIG_DSIB }, + { .data = MIPI_CAL_CONFIG_DSIC }, + { .data = MIPI_CAL_CONFIG_DSID }, +}; + +static const struct tegra_mipi_soc tegra114_mipi_soc = { + .has_clk_lane = false, + .pads = tegra114_mipi_pads, + .num_pads = ARRAY_SIZE(tegra114_mipi_pads), + .clock_enable_override = true, + .needs_vclamp_ref = true, + .pad_drive_down_ref = 0x2, + .pad_drive_up_ref = 0x0, + .pad_vclamp_level = 0x0, + .pad_vauxp_level = 0x0, + .hspdos = 0x0, + .hspuos = 0x4, + .termos = 0x5, + .hsclkpdos = 0x0, + .hsclkpuos = 0x4, +}; + +static const struct tegra_mipi_pad tegra124_mipi_pads[] = { + { .data = MIPI_CAL_CONFIG_CSIA, .clk = MIPI_CAL_CONFIG_CSIAB_CLK }, + { .data = MIPI_CAL_CONFIG_CSIB, .clk = MIPI_CAL_CONFIG_CSIAB_CLK }, + { .data = MIPI_CAL_CONFIG_CSIC, .clk = MIPI_CAL_CONFIG_CSICD_CLK }, + { .data = MIPI_CAL_CONFIG_CSID, .clk = MIPI_CAL_CONFIG_CSICD_CLK }, + { .data = MIPI_CAL_CONFIG_CSIE, .clk = MIPI_CAL_CONFIG_CSIE_CLK }, + { .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK }, + { .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK }, +}; + +static const struct tegra_mipi_soc tegra124_mipi_soc = { + .has_clk_lane = true, + .pads = tegra124_mipi_pads, + .num_pads = ARRAY_SIZE(tegra124_mipi_pads), + .clock_enable_override = true, + .needs_vclamp_ref = true, + .pad_drive_down_ref = 0x2, + .pad_drive_up_ref = 0x0, + .pad_vclamp_level = 0x0, + .pad_vauxp_level = 0x0, + .hspdos = 0x0, + .hspuos = 0x0, + .termos = 0x0, + .hsclkpdos = 0x1, + .hsclkpuos = 0x2, +}; + +static const struct tegra_mipi_soc tegra132_mipi_soc = { + .has_clk_lane = true, + .pads = tegra124_mipi_pads, + .num_pads = ARRAY_SIZE(tegra124_mipi_pads), + .clock_enable_override = false, + .needs_vclamp_ref = false, + .pad_drive_down_ref = 0x0, + .pad_drive_up_ref = 0x3, + .pad_vclamp_level = 0x0, + .pad_vauxp_level = 0x0, + .hspdos = 0x0, + .hspuos = 0x0, + .termos = 0x0, + .hsclkpdos = 0x3, + .hsclkpuos = 0x2, +}; + +static const struct tegra_mipi_pad tegra210_mipi_pads[] = { + { .data = MIPI_CAL_CONFIG_CSIA, .clk = 0 }, + { .data = MIPI_CAL_CONFIG_CSIB, .clk = 0 }, + { .data = MIPI_CAL_CONFIG_CSIC, .clk = 0 }, + { .data = MIPI_CAL_CONFIG_CSID, .clk = 0 }, + { .data = MIPI_CAL_CONFIG_CSIE, .clk = 0 }, + { .data = MIPI_CAL_CONFIG_CSIF, .clk = 0 }, + { .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK }, + { .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK }, + { .data = MIPI_CAL_CONFIG_DSIC, .clk = MIPI_CAL_CONFIG_DSIC_CLK }, + { .data = MIPI_CAL_CONFIG_DSID, .clk = MIPI_CAL_CONFIG_DSID_CLK }, +}; + +static const struct tegra_mipi_soc tegra210_mipi_soc = { + .has_clk_lane = true, + .pads = tegra210_mipi_pads, + .num_pads = ARRAY_SIZE(tegra210_mipi_pads), + .clock_enable_override = true, + .needs_vclamp_ref = false, + .pad_drive_down_ref = 0x0, + .pad_drive_up_ref = 0x3, + .pad_vclamp_level = 0x1, + .pad_vauxp_level = 0x1, + .hspdos = 0x0, + .hspuos = 0x2, + .termos = 0x0, + .hsclkpdos = 0x0, + .hsclkpuos = 0x2, +}; + +static const struct of_device_id tegra_mipi_of_match[] = { + { .compatible = "nvidia,tegra114-mipi", .data = &tegra114_mipi_soc }, + { .compatible = "nvidia,tegra124-mipi", .data = &tegra124_mipi_soc }, + { .compatible = "nvidia,tegra132-mipi", .data = &tegra132_mipi_soc }, + { .compatible = "nvidia,tegra210-mipi", .data = &tegra210_mipi_soc }, + { }, +}; + +static int tegra_mipi_probe(struct platform_device *pdev) +{ + const struct of_device_id *match; + struct tegra_mipi *mipi; + int err; + + match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node); + if (!match) + return -ENODEV; + + mipi = devm_kzalloc(&pdev->dev, sizeof(*mipi), GFP_KERNEL); + if (!mipi) + return -ENOMEM; + + mipi->soc = match->data; + mipi->dev = &pdev->dev; + + mipi->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); + if (IS_ERR(mipi->regs)) + return PTR_ERR(mipi->regs); + + mutex_init(&mipi->lock); + + mipi->clk = devm_clk_get(&pdev->dev, NULL); + if (IS_ERR(mipi->clk)) { + dev_err(&pdev->dev, "failed to get clock\n"); + return PTR_ERR(mipi->clk); + } + + err = clk_prepare(mipi->clk); + if (err < 0) + return err; + + platform_set_drvdata(pdev, mipi); + + return 0; +} + +static int tegra_mipi_remove(struct platform_device *pdev) +{ + struct tegra_mipi *mipi = platform_get_drvdata(pdev); + + clk_unprepare(mipi->clk); + + return 0; +} + +struct platform_driver tegra_mipi_driver = { + .driver = { + .name = "tegra-mipi", + .of_match_table = tegra_mipi_of_match, + }, + .probe = tegra_mipi_probe, + .remove = tegra_mipi_remove, +}; diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c new file mode 100644 index 0000000000..f63d14a57a --- /dev/null +++ b/drivers/gpu/host1x/syncpt.c @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Tegra host1x Syncpoints + * + * Copyright (c) 2010-2015, NVIDIA Corporation. + */ + +#include <linux/module.h> +#include <linux/device.h> +#include <linux/dma-fence.h> +#include <linux/slab.h> + +#include <trace/events/host1x.h> + +#include "syncpt.h" +#include "dev.h" +#include "intr.h" +#include "debug.h" + +#define SYNCPT_CHECK_PERIOD (2 * HZ) +#define MAX_STUCK_CHECK_COUNT 15 + +static struct host1x_syncpt_base * +host1x_syncpt_base_request(struct host1x *host) +{ + struct host1x_syncpt_base *bases = host->bases; + unsigned int i; + + for (i = 0; i < host->info->nb_bases; i++) + if (!bases[i].requested) + break; + + if (i >= host->info->nb_bases) + return NULL; + + bases[i].requested = true; + return &bases[i]; +} + +static void host1x_syncpt_base_free(struct host1x_syncpt_base *base) +{ + if (base) + base->requested = false; +} + +/** + * host1x_syncpt_alloc() - allocate a syncpoint + * @host: host1x device data + * @flags: bitfield of HOST1X_SYNCPT_* flags + * @name: name for the syncpoint for use in debug prints + * + * Allocates a hardware syncpoint for the caller's use. The caller then has + * the sole authority to mutate the syncpoint's value until it is freed again. + * + * If no free syncpoints are available, or a NULL name was specified, returns + * NULL. + */ +struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, + unsigned long flags, + const char *name) +{ + struct host1x_syncpt *sp = host->syncpt; + char *full_name; + unsigned int i; + + if (!name) + return NULL; + + mutex_lock(&host->syncpt_mutex); + + for (i = 0; i < host->info->nb_pts && kref_read(&sp->ref); i++, sp++) + ; + + if (i >= host->info->nb_pts) + goto unlock; + + if (flags & HOST1X_SYNCPT_HAS_BASE) { + sp->base = host1x_syncpt_base_request(host); + if (!sp->base) + goto unlock; + } + + full_name = kasprintf(GFP_KERNEL, "%u-%s", sp->id, name); + if (!full_name) + goto free_base; + + sp->name = full_name; + + if (flags & HOST1X_SYNCPT_CLIENT_MANAGED) + sp->client_managed = true; + else + sp->client_managed = false; + + kref_init(&sp->ref); + + mutex_unlock(&host->syncpt_mutex); + return sp; + +free_base: + host1x_syncpt_base_free(sp->base); + sp->base = NULL; +unlock: + mutex_unlock(&host->syncpt_mutex); + return NULL; +} +EXPORT_SYMBOL(host1x_syncpt_alloc); + +/** + * host1x_syncpt_id() - retrieve syncpoint ID + * @sp: host1x syncpoint + * + * Given a pointer to a struct host1x_syncpt, retrieves its ID. This ID is + * often used as a value to program into registers that control how hardware + * blocks interact with syncpoints. + */ +u32 host1x_syncpt_id(struct host1x_syncpt *sp) +{ + return sp->id; +} +EXPORT_SYMBOL(host1x_syncpt_id); + +/** + * host1x_syncpt_incr_max() - update the value sent to hardware + * @sp: host1x syncpoint + * @incrs: number of increments + */ +u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs) +{ + return (u32)atomic_add_return(incrs, &sp->max_val); +} +EXPORT_SYMBOL(host1x_syncpt_incr_max); + + /* + * Write cached syncpoint and waitbase values to hardware. + */ +void host1x_syncpt_restore(struct host1x *host) +{ + struct host1x_syncpt *sp_base = host->syncpt; + unsigned int i; + + for (i = 0; i < host1x_syncpt_nb_pts(host); i++) { + /* + * Unassign syncpt from channels for purposes of Tegra186 + * syncpoint protection. This prevents any channel from + * accessing it until it is reassigned. + */ + host1x_hw_syncpt_assign_to_channel(host, sp_base + i, NULL); + host1x_hw_syncpt_restore(host, sp_base + i); + } + + for (i = 0; i < host1x_syncpt_nb_bases(host); i++) + host1x_hw_syncpt_restore_wait_base(host, sp_base + i); + + host1x_hw_syncpt_enable_protection(host); + + wmb(); +} + +/* + * Update the cached syncpoint and waitbase values by reading them + * from the registers. + */ +void host1x_syncpt_save(struct host1x *host) +{ + struct host1x_syncpt *sp_base = host->syncpt; + unsigned int i; + + for (i = 0; i < host1x_syncpt_nb_pts(host); i++) { + if (host1x_syncpt_client_managed(sp_base + i)) + host1x_hw_syncpt_load(host, sp_base + i); + else + WARN_ON(!host1x_syncpt_idle(sp_base + i)); + } + + for (i = 0; i < host1x_syncpt_nb_bases(host); i++) + host1x_hw_syncpt_load_wait_base(host, sp_base + i); +} + +/* + * Updates the cached syncpoint value by reading a new value from the hardware + * register + */ +u32 host1x_syncpt_load(struct host1x_syncpt *sp) +{ + u32 val; + + val = host1x_hw_syncpt_load(sp->host, sp); + trace_host1x_syncpt_load_min(sp->id, val); + + return val; +} + +/* + * Get the current syncpoint base + */ +u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp) +{ + host1x_hw_syncpt_load_wait_base(sp->host, sp); + + return sp->base_val; +} + +/** + * host1x_syncpt_incr() - increment syncpoint value from CPU, updating cache + * @sp: host1x syncpoint + */ +int host1x_syncpt_incr(struct host1x_syncpt *sp) +{ + return host1x_hw_syncpt_cpu_incr(sp->host, sp); +} +EXPORT_SYMBOL(host1x_syncpt_incr); + +/** + * host1x_syncpt_wait() - wait for a syncpoint to reach a given value + * @sp: host1x syncpoint + * @thresh: threshold + * @timeout: maximum time to wait for the syncpoint to reach the given value + * @value: return location for the syncpoint value + */ +int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, + u32 *value) +{ + struct dma_fence *fence; + long wait_err; + + host1x_hw_syncpt_load(sp->host, sp); + + if (value) + *value = host1x_syncpt_load(sp); + + if (host1x_syncpt_is_expired(sp, thresh)) + return 0; + + if (timeout < 0) + timeout = LONG_MAX; + else if (timeout == 0) + return -EAGAIN; + + fence = host1x_fence_create(sp, thresh, false); + if (IS_ERR(fence)) + return PTR_ERR(fence); + + wait_err = dma_fence_wait_timeout(fence, true, timeout); + if (wait_err == 0) + host1x_fence_cancel(fence); + dma_fence_put(fence); + + if (value) + *value = host1x_syncpt_load(sp); + + /* + * Don't rely on dma_fence_wait_timeout return value, + * since it returns zero both on timeout and if the + * wait completed with 0 jiffies left. + */ + host1x_hw_syncpt_load(sp->host, sp); + if (wait_err == 0 && !host1x_syncpt_is_expired(sp, thresh)) + return -EAGAIN; + else if (wait_err < 0) + return wait_err; + else + return 0; +} +EXPORT_SYMBOL(host1x_syncpt_wait); + +/* + * Returns true if syncpoint is expired, false if we may need to wait + */ +bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh) +{ + u32 current_val; + + smp_rmb(); + + current_val = (u32)atomic_read(&sp->min_val); + + return ((current_val - thresh) & 0x80000000U) == 0U; +} + +int host1x_syncpt_init(struct host1x *host) +{ + struct host1x_syncpt_base *bases; + struct host1x_syncpt *syncpt; + unsigned int i; + + syncpt = devm_kcalloc(host->dev, host->info->nb_pts, sizeof(*syncpt), + GFP_KERNEL); + if (!syncpt) + return -ENOMEM; + + bases = devm_kcalloc(host->dev, host->info->nb_bases, sizeof(*bases), + GFP_KERNEL); + if (!bases) + return -ENOMEM; + + for (i = 0; i < host->info->nb_pts; i++) { + syncpt[i].id = i; + syncpt[i].host = host; + } + + for (i = 0; i < host->info->nb_bases; i++) + bases[i].id = i; + + mutex_init(&host->syncpt_mutex); + host->syncpt = syncpt; + host->bases = bases; + + /* Allocate sync point to use for clearing waits for expired fences */ + host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop"); + if (!host->nop_sp) + return -ENOMEM; + + if (host->info->reserve_vblank_syncpts) { + kref_init(&host->syncpt[26].ref); + kref_init(&host->syncpt[27].ref); + } + + return 0; +} + +/** + * host1x_syncpt_request() - request a syncpoint + * @client: client requesting the syncpoint + * @flags: flags + * + * host1x client drivers can use this function to allocate a syncpoint for + * subsequent use. A syncpoint returned by this function will be reserved for + * use by the client exclusively. When no longer using a syncpoint, a host1x + * client driver needs to release it using host1x_syncpt_put(). + */ +struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, + unsigned long flags) +{ + struct host1x *host = dev_get_drvdata(client->host->parent); + + return host1x_syncpt_alloc(host, flags, dev_name(client->dev)); +} +EXPORT_SYMBOL(host1x_syncpt_request); + +static void syncpt_release(struct kref *ref) +{ + struct host1x_syncpt *sp = container_of(ref, struct host1x_syncpt, ref); + + atomic_set(&sp->max_val, host1x_syncpt_read(sp)); + + sp->locked = false; + + mutex_lock(&sp->host->syncpt_mutex); + + host1x_syncpt_base_free(sp->base); + kfree(sp->name); + sp->base = NULL; + sp->name = NULL; + sp->client_managed = false; + + mutex_unlock(&sp->host->syncpt_mutex); +} + +/** + * host1x_syncpt_put() - free a requested syncpoint + * @sp: host1x syncpoint + * + * Release a syncpoint previously allocated using host1x_syncpt_request(). A + * host1x client driver should call this when the syncpoint is no longer in + * use. + */ +void host1x_syncpt_put(struct host1x_syncpt *sp) +{ + if (!sp) + return; + + kref_put(&sp->ref, syncpt_release); +} +EXPORT_SYMBOL(host1x_syncpt_put); + +void host1x_syncpt_deinit(struct host1x *host) +{ + struct host1x_syncpt *sp = host->syncpt; + unsigned int i; + + for (i = 0; i < host->info->nb_pts; i++, sp++) + kfree(sp->name); +} + +/** + * host1x_syncpt_read_max() - read maximum syncpoint value + * @sp: host1x syncpoint + * + * The maximum syncpoint value indicates how many operations there are in + * queue, either in channel or in a software thread. + */ +u32 host1x_syncpt_read_max(struct host1x_syncpt *sp) +{ + smp_rmb(); + + return (u32)atomic_read(&sp->max_val); +} +EXPORT_SYMBOL(host1x_syncpt_read_max); + +/** + * host1x_syncpt_read_min() - read minimum syncpoint value + * @sp: host1x syncpoint + * + * The minimum syncpoint value is a shadow of the current sync point value in + * hardware. + */ +u32 host1x_syncpt_read_min(struct host1x_syncpt *sp) +{ + smp_rmb(); + + return (u32)atomic_read(&sp->min_val); +} +EXPORT_SYMBOL(host1x_syncpt_read_min); + +/** + * host1x_syncpt_read() - read the current syncpoint value + * @sp: host1x syncpoint + */ +u32 host1x_syncpt_read(struct host1x_syncpt *sp) +{ + return host1x_syncpt_load(sp); +} +EXPORT_SYMBOL(host1x_syncpt_read); + +unsigned int host1x_syncpt_nb_pts(struct host1x *host) +{ + return host->info->nb_pts; +} + +unsigned int host1x_syncpt_nb_bases(struct host1x *host) +{ + return host->info->nb_bases; +} + +unsigned int host1x_syncpt_nb_mlocks(struct host1x *host) +{ + return host->info->nb_mlocks; +} + +/** + * host1x_syncpt_get_by_id() - obtain a syncpoint by ID + * @host: host1x controller + * @id: syncpoint ID + */ +struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host, + unsigned int id) +{ + if (id >= host->info->nb_pts) + return NULL; + + if (kref_get_unless_zero(&host->syncpt[id].ref)) + return &host->syncpt[id]; + else + return NULL; +} +EXPORT_SYMBOL(host1x_syncpt_get_by_id); + +/** + * host1x_syncpt_get_by_id_noref() - obtain a syncpoint by ID but don't + * increase the refcount. + * @host: host1x controller + * @id: syncpoint ID + */ +struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host, + unsigned int id) +{ + if (id >= host->info->nb_pts) + return NULL; + + return &host->syncpt[id]; +} +EXPORT_SYMBOL(host1x_syncpt_get_by_id_noref); + +/** + * host1x_syncpt_get() - increment syncpoint refcount + * @sp: syncpoint + */ +struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp) +{ + kref_get(&sp->ref); + + return sp; +} +EXPORT_SYMBOL(host1x_syncpt_get); + +/** + * host1x_syncpt_get_base() - obtain the wait base associated with a syncpoint + * @sp: host1x syncpoint + */ +struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp) +{ + return sp ? sp->base : NULL; +} +EXPORT_SYMBOL(host1x_syncpt_get_base); + +/** + * host1x_syncpt_base_id() - retrieve the ID of a syncpoint wait base + * @base: host1x syncpoint wait base + */ +u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base) +{ + return base->id; +} +EXPORT_SYMBOL(host1x_syncpt_base_id); + +static void do_nothing(struct kref *ref) +{ +} + +/** + * host1x_syncpt_release_vblank_reservation() - Make VBLANK syncpoint + * available for allocation + * + * @client: host1x bus client + * @syncpt_id: syncpoint ID to make available + * + * Makes VBLANK<i> syncpoint available for allocatation if it was + * reserved at initialization time. This should be called by the display + * driver after it has ensured that any VBLANK increment programming configured + * by the boot chain has been disabled. + */ +void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, + u32 syncpt_id) +{ + struct host1x *host = dev_get_drvdata(client->host->parent); + + if (!host->info->reserve_vblank_syncpts) + return; + + kref_put(&host->syncpt[syncpt_id].ref, do_nothing); +} +EXPORT_SYMBOL(host1x_syncpt_release_vblank_reservation); diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h new file mode 100644 index 0000000000..4c3f3b2f0e --- /dev/null +++ b/drivers/gpu/host1x/syncpt.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Syncpoints + * + * Copyright (c) 2010-2013, NVIDIA Corporation. + */ + +#ifndef __HOST1X_SYNCPT_H +#define __HOST1X_SYNCPT_H + +#include <linux/atomic.h> +#include <linux/host1x.h> +#include <linux/kernel.h> +#include <linux/kref.h> +#include <linux/sched.h> + +#include "fence.h" +#include "intr.h" + +struct host1x; + +/* Reserved for replacing an expired wait with a NOP */ +#define HOST1X_SYNCPT_RESERVED 0 + +struct host1x_syncpt_base { + unsigned int id; + bool requested; +}; + +struct host1x_syncpt { + struct kref ref; + + unsigned int id; + atomic_t min_val; + atomic_t max_val; + u32 base_val; + const char *name; + bool client_managed; + struct host1x *host; + struct host1x_syncpt_base *base; + + /* interrupt data */ + struct host1x_fence_list fences; + + /* + * If a submission incrementing this syncpoint fails, lock it so that + * further submission cannot be made until application has handled the + * failure. + */ + bool locked; +}; + +/* Initialize sync point array */ +int host1x_syncpt_init(struct host1x *host); + +/* Free sync point array */ +void host1x_syncpt_deinit(struct host1x *host); + +/* Return number of sync point supported. */ +unsigned int host1x_syncpt_nb_pts(struct host1x *host); + +/* Return number of wait bases supported. */ +unsigned int host1x_syncpt_nb_bases(struct host1x *host); + +/* Return number of mlocks supported. */ +unsigned int host1x_syncpt_nb_mlocks(struct host1x *host); + +/* + * Check sync point sanity. If max is larger than min, there have too many + * sync point increments. + * + * Client managed sync point are not tracked. + * */ +static inline bool host1x_syncpt_check_max(struct host1x_syncpt *sp, u32 real) +{ + u32 max; + if (sp->client_managed) + return true; + max = host1x_syncpt_read_max(sp); + return (s32)(max - real) >= 0; +} + +/* Return true if sync point is client managed. */ +static inline bool host1x_syncpt_client_managed(struct host1x_syncpt *sp) +{ + return sp->client_managed; +} + +/* + * Returns true if syncpoint min == max, which means that there are no + * outstanding operations. + */ +static inline bool host1x_syncpt_idle(struct host1x_syncpt *sp) +{ + int min, max; + smp_rmb(); + min = atomic_read(&sp->min_val); + max = atomic_read(&sp->max_val); + return (min == max); +} + +/* Load current value from hardware to the shadow register. */ +u32 host1x_syncpt_load(struct host1x_syncpt *sp); + +/* Check if the given syncpoint value has already passed */ +bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh); + +/* Save host1x sync point state into shadow registers. */ +void host1x_syncpt_save(struct host1x *host); + +/* Reset host1x sync point state from shadow registers. */ +void host1x_syncpt_restore(struct host1x *host); + +/* Read current wait base value into shadow register and return it. */ +u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp); + +/* Indicate future operations by incrementing the sync point max. */ +u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs); + +/* Check if sync point id is valid. */ +static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) +{ + return sp->id < host1x_syncpt_nb_pts(sp->host); +} + +static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp) +{ + sp->locked = true; +} + +#endif |