diff options
Diffstat (limited to 'drivers/gpu/drm/nouveau')
24 files changed, 526 insertions, 433 deletions
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig index 1e6aaf95ff..ceef470c9f 100644 --- a/drivers/gpu/drm/nouveau/Kconfig +++ b/drivers/gpu/drm/nouveau/Kconfig @@ -100,3 +100,11 @@ config DRM_NOUVEAU_SVM help Say Y here if you want to enable experimental support for Shared Virtual Memory (SVM). + +config DRM_NOUVEAU_GSP_DEFAULT + bool "Use GSP firmware for Turing/Ampere (needs firmware installed)" + depends on DRM_NOUVEAU + default n + help + Say Y here if you want to use the GSP codepaths by default on + Turing and Ampere GPUs. diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index d093549f6e..8d37a694b7 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -38,7 +38,9 @@ #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_edid.h> +#include <drm/drm_eld.h> #include <drm/drm_fb_helper.h> +#include <drm/drm_fixed.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> @@ -945,7 +947,8 @@ nv50_msto_prepare(struct drm_atomic_state *state, if (ret == 0) { nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, payload->vc_start_slot, payload->time_slots, - payload->pbn, payload->time_slots * mst_state->pbn_div); + payload->pbn, + payload->time_slots * dfixed_trunc(mst_state->pbn_div)); } else { nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0); } @@ -989,7 +992,7 @@ nv50_msto_atomic_check(struct drm_encoder *encoder, if (IS_ERR(mst_state)) return PTR_ERR(mst_state); - if (!mst_state->pbn_div) { + if (!mst_state->pbn_div.full) { struct nouveau_encoder *outp = mstc->mstm->outp; mst_state->pbn_div = drm_dp_get_vc_payload_bw(&mstm->mgr, diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c index 2edd7bb13f..80f74ee0fc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.c +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c @@ -127,21 +127,16 @@ nouveau_abi16_chan_fini(struct nouveau_abi16 *abi16, { struct nouveau_abi16_ntfy *ntfy, *temp; - /* When a client exits without waiting for it's queued up jobs to - * finish it might happen that we fault the channel. This is due to - * drm_file_free() calling drm_gem_release() before the postclose() - * callback. Hence, we can't tear down this scheduler entity before - * uvmm mappings are unmapped. Currently, we can't detect this case. - * - * However, this should be rare and harmless, since the channel isn't - * needed anymore. - */ - nouveau_sched_entity_fini(&chan->sched_entity); + /* Cancel all jobs from the entity's queue. */ + if (chan->sched) + drm_sched_entity_fini(&chan->sched->entity); - /* wait for all activity to stop before cleaning up */ if (chan->chan) nouveau_channel_idle(chan->chan); + if (chan->sched) + nouveau_sched_destroy(&chan->sched); + /* cleanup notifier state */ list_for_each_entry_safe(ntfy, temp, &chan->notifiers, head) { nouveau_abi16_ntfy_fini(chan, ntfy); @@ -204,6 +199,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) struct nouveau_cli *cli = nouveau_cli(file_priv); struct nouveau_drm *drm = nouveau_drm(dev); struct nvif_device *device = &drm->client.device; + struct nvkm_device *nvkm_device = nvxx_device(&drm->client.device); struct nvkm_gr *gr = nvxx_gr(device); struct drm_nouveau_getparam *getparam = data; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -268,6 +264,14 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS) getparam->value = nouveau_exec_push_max_from_ib_max(ib_max); break; } + case NOUVEAU_GETPARAM_VRAM_BAR_SIZE: + getparam->value = nvkm_device->func->resource_size(nvkm_device, 1); + break; + case NOUVEAU_GETPARAM_VRAM_USED: { + struct ttm_resource_manager *vram_mgr = ttm_manager_type(&drm->ttm.bdev, TTM_PL_VRAM); + getparam->value = (u64)ttm_resource_manager_usage(vram_mgr); + break; + } default: NV_PRINTK(dbg, cli, "unknown parameter %lld\n", getparam->param); return -EINVAL; @@ -344,10 +348,16 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS) if (ret) goto done; - ret = nouveau_sched_entity_init(&chan->sched_entity, &drm->sched, - drm->sched_wq); - if (ret) - goto done; + /* If we're not using the VM_BIND uAPI, we don't need a scheduler. + * + * The client lock is already acquired by nouveau_abi16_get(). + */ + if (nouveau_cli_uvmm(cli)) { + ret = nouveau_sched_create(&chan->sched, drm, drm->sched_wq, + chan->chan->dma.ib_max); + if (ret) + goto done; + } init->channel = chan->chan->chid; diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.h b/drivers/gpu/drm/nouveau/nouveau_abi16.h index 9f538486c1..11c8c4a800 100644 --- a/drivers/gpu/drm/nouveau/nouveau_abi16.h +++ b/drivers/gpu/drm/nouveau/nouveau_abi16.h @@ -26,7 +26,7 @@ struct nouveau_abi16_chan { struct nouveau_bo *ntfy; struct nouveau_vma *ntfy_vma; struct nvkm_mm heap; - struct nouveau_sched_entity sched_entity; + struct nouveau_sched *sched; }; struct nouveau_abi16 { diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c index 479effcf60..79cfab53f8 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bios.c +++ b/drivers/gpu/drm/nouveau/nouveau_bios.c @@ -23,6 +23,7 @@ */ #include "nouveau_drv.h" +#include "nouveau_bios.h" #include "nouveau_reg.h" #include "dispnv04/hw.h" #include "nouveau_encoder.h" @@ -1677,7 +1678,7 @@ apply_dcb_encoder_quirks(struct drm_device *dev, int idx, u32 *conn, u32 *conf) */ if (nv_match_device(dev, 0x0201, 0x1462, 0x8851)) { if (*conn == 0xf2005014 && *conf == 0xffffffff) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, 1); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 1, 1, DCB_OUTPUT_B); return false; } } @@ -1763,26 +1764,26 @@ fabricate_dcb_encoder_table(struct drm_device *dev, struct nvbios *bios) #ifdef __powerpc__ /* Apple iMac G4 NV17 */ if (of_machine_is_compatible("PowerMac4,5")) { - fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, 1); - fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, 2); + fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, 0, all_heads, DCB_OUTPUT_B); + fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, 1, all_heads, DCB_OUTPUT_C); return; } #endif /* Make up some sane defaults */ fabricate_dcb_output(dcb, DCB_OUTPUT_ANALOG, - bios->legacy.i2c_indices.crt, 1, 1); + bios->legacy.i2c_indices.crt, 1, DCB_OUTPUT_B); if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0) fabricate_dcb_output(dcb, DCB_OUTPUT_TV, bios->legacy.i2c_indices.tv, - all_heads, 0); + all_heads, DCB_OUTPUT_A); else if (bios->tmds.output0_script_ptr || bios->tmds.output1_script_ptr) fabricate_dcb_output(dcb, DCB_OUTPUT_TMDS, bios->legacy.i2c_indices.panel, - all_heads, 1); + all_heads, DCB_OUTPUT_B); } static int diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 254d6c9ef2..5d8ee17295 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -148,10 +148,17 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo) * If nouveau_bo_new() allocated this buffer, the GEM object was never * initialized, so don't attempt to release it. */ - if (bo->base.dev) + if (bo->base.dev) { + /* Gem objects not being shared with other VMs get their + * dma_resv from a root GEM object. + */ + if (nvbo->no_share) + drm_gem_object_put(nvbo->r_obj); + drm_gem_object_release(&bo->base); - else + } else { dma_resv_fini(&bo->base._resv); + } kfree(nvbo); } @@ -1055,17 +1062,18 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, { struct nouveau_drm *drm = nouveau_bdev(bo->bdev); struct nouveau_bo *nvbo = nouveau_bo(bo); + struct drm_gem_object *obj = &bo->base; struct ttm_resource *old_reg = bo->resource; struct nouveau_drm_tile *new_tile = NULL; int ret = 0; - if (new_reg->mem_type == TTM_PL_TT) { ret = nouveau_ttm_tt_bind(bo->bdev, bo->ttm, new_reg); if (ret) return ret; } + drm_gpuvm_bo_gem_evict(obj, evict); nouveau_bo_move_ntfy(bo, new_reg); ret = ttm_bo_wait_ctx(bo, ctx); if (ret) @@ -1130,6 +1138,7 @@ out: out_ntfy: if (ret) { nouveau_bo_move_ntfy(bo, bo->resource); + drm_gpuvm_bo_gem_evict(obj, !evict); } return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h index 07f671cf89..70c551921a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.h +++ b/drivers/gpu/drm/nouveau/nouveau_bo.h @@ -26,6 +26,11 @@ struct nouveau_bo { struct list_head entry; int pbbo_index; bool validate_mapped; + + /* Root GEM object we derive the dma_resv of in case this BO is not + * shared between VMs. + */ + struct drm_gem_object *r_obj; bool no_share; /* GPU address space is independent of CPU word size */ diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 75545da9d1..a947e1d5f3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -190,6 +190,8 @@ nouveau_cli_work_queue(struct nouveau_cli *cli, struct dma_fence *fence, static void nouveau_cli_fini(struct nouveau_cli *cli) { + struct nouveau_uvmm *uvmm = nouveau_cli_uvmm_locked(cli); + /* All our channels are dead now, which means all the fences they * own are signalled, and all callback functions have been called. * @@ -199,8 +201,10 @@ nouveau_cli_fini(struct nouveau_cli *cli) WARN_ON(!list_empty(&cli->worker)); usif_client_fini(cli); - nouveau_uvmm_fini(&cli->uvmm); - nouveau_sched_entity_fini(&cli->sched_entity); + if (cli->sched) + nouveau_sched_destroy(&cli->sched); + if (uvmm) + nouveau_uvmm_fini(uvmm); nouveau_vmm_fini(&cli->svm); nouveau_vmm_fini(&cli->vmm); nvif_mmu_dtor(&cli->mmu); @@ -307,8 +311,17 @@ nouveau_cli_init(struct nouveau_drm *drm, const char *sname, cli->mem = &mems[ret]; - ret = nouveau_sched_entity_init(&cli->sched_entity, &drm->sched, - drm->sched_wq); + /* Don't pass in the (shared) sched_wq in order to let + * nouveau_sched_create() create a dedicated one for VM_BIND jobs. + * + * This is required to ensure that for VM_BIND jobs free_job() work and + * run_job() work can always run concurrently and hence, free_job() work + * can never stall run_job() work. For EXEC jobs we don't have this + * requirement, since EXEC job's free_job() does not require to take any + * locks which indirectly or directly are held for allocations + * elsewhere. + */ + ret = nouveau_sched_create(&cli->sched, drm, NULL, 1); if (ret) goto done; @@ -579,13 +592,16 @@ nouveau_drm_device_init(struct drm_device *dev) nvif_parent_ctor(&nouveau_parent, &drm->parent); drm->master.base.object.parent = &drm->parent; - ret = nouveau_sched_init(drm); - if (ret) + drm->sched_wq = alloc_workqueue("nouveau_sched_wq_shared", 0, + WQ_MAX_ACTIVE); + if (!drm->sched_wq) { + ret = -ENOMEM; goto fail_alloc; + } ret = nouveau_cli_init(drm, "DRM-master", &drm->master); if (ret) - goto fail_sched; + goto fail_wq; ret = nouveau_cli_init(drm, "DRM", &drm->client); if (ret) @@ -655,8 +671,8 @@ fail_ttm: nouveau_cli_fini(&drm->client); fail_master: nouveau_cli_fini(&drm->master); -fail_sched: - nouveau_sched_fini(drm); +fail_wq: + destroy_workqueue(drm->sched_wq); fail_alloc: nvif_parent_dtor(&drm->parent); kfree(drm); @@ -710,9 +726,7 @@ nouveau_drm_device_fini(struct drm_device *dev) nouveau_cli_fini(&drm->client); nouveau_cli_fini(&drm->master); - - nouveau_sched_fini(drm); - + destroy_workqueue(drm->sched_wq); nvif_parent_dtor(&drm->parent); mutex_destroy(&drm->clients_lock); kfree(drm); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index e73a233c65..e239c6bf4a 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -93,9 +93,12 @@ struct nouveau_cli { struct nvif_mmu mmu; struct nouveau_vmm vmm; struct nouveau_vmm svm; - struct nouveau_uvmm uvmm; + struct { + struct nouveau_uvmm *ptr; + bool disabled; + } uvmm; - struct nouveau_sched_entity sched_entity; + struct nouveau_sched *sched; const struct nvif_mclass *mem; @@ -121,10 +124,7 @@ struct nouveau_cli_work { static inline struct nouveau_uvmm * nouveau_cli_uvmm(struct nouveau_cli *cli) { - if (!cli || !cli->uvmm.vmm.cli) - return NULL; - - return &cli->uvmm; + return cli ? cli->uvmm.ptr : NULL; } static inline struct nouveau_uvmm * @@ -258,6 +258,9 @@ struct nouveau_drm { u64 context_base; } *runl; + /* Workqueue used for channel schedulers. */ + struct workqueue_struct *sched_wq; + /* context for accelerated drm-internal operations */ struct nouveau_channel *cechan; struct nouveau_channel *channel; @@ -298,10 +301,6 @@ struct nouveau_drm { struct mutex lock; bool component_registered; } audio; - - struct drm_gpu_scheduler sched; - struct workqueue_struct *sched_wq; - }; static inline struct nouveau_drm * diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.c b/drivers/gpu/drm/nouveau/nouveau_exec.c index 9a5ef57474..e65c0ef23b 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.c +++ b/drivers/gpu/drm/nouveau/nouveau_exec.c @@ -1,7 +1,5 @@ // SPDX-License-Identifier: MIT -#include <drm/drm_exec.h> - #include "nouveau_drv.h" #include "nouveau_gem.h" #include "nouveau_mem.h" @@ -86,14 +84,12 @@ */ static int -nouveau_exec_job_submit(struct nouveau_job *job) +nouveau_exec_job_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); struct nouveau_cli *cli = job->cli; struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(cli); - struct drm_exec *exec = &job->exec; - struct drm_gem_object *obj; - unsigned long index; int ret; /* Create a new fence, but do not emit yet. */ @@ -102,52 +98,29 @@ nouveau_exec_job_submit(struct nouveau_job *job) return ret; nouveau_uvmm_lock(uvmm); - drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); - drm_exec_until_all_locked(exec) { - struct drm_gpuva *va; - - drm_gpuvm_for_each_va(va, &uvmm->base) { - if (unlikely(va == &uvmm->base.kernel_alloc_node)) - continue; - - ret = drm_exec_prepare_obj(exec, va->gem.obj, 1); - drm_exec_retry_on_contention(exec); - if (ret) - goto err_uvmm_unlock; - } + ret = drm_gpuvm_exec_lock(vme); + if (ret) { + nouveau_uvmm_unlock(uvmm); + return ret; } nouveau_uvmm_unlock(uvmm); - drm_exec_for_each_locked_object(exec, index, obj) { - struct nouveau_bo *nvbo = nouveau_gem_object(obj); - - ret = nouveau_bo_validate(nvbo, true, false); - if (ret) - goto err_exec_fini; + ret = drm_gpuvm_exec_validate(vme); + if (ret) { + drm_gpuvm_exec_unlock(vme); + return ret; } return 0; - -err_uvmm_unlock: - nouveau_uvmm_unlock(uvmm); -err_exec_fini: - drm_exec_fini(exec); - return ret; - } static void -nouveau_exec_job_armed_submit(struct nouveau_job *job) +nouveau_exec_job_armed_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { - struct drm_exec *exec = &job->exec; - struct drm_gem_object *obj; - unsigned long index; - - drm_exec_for_each_locked_object(exec, index, obj) - dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); - - drm_exec_fini(exec); + drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, + job->resv_usage, job->resv_usage); + drm_gpuvm_exec_unlock(vme); } static struct dma_fence * @@ -192,6 +165,7 @@ nouveau_exec_job_free(struct nouveau_job *job) { struct nouveau_exec_job *exec_job = to_nouveau_exec_job(job); + nouveau_job_done(job); nouveau_job_free(job); kfree(exec_job->fence); @@ -211,8 +185,6 @@ nouveau_exec_job_timeout(struct nouveau_job *job) NV_PRINTK(warn, job->cli, "job timeout, channel %d killed!\n", chan->chid); - nouveau_sched_entity_fini(job->entity); - return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -259,10 +231,12 @@ nouveau_exec_job_init(struct nouveau_exec_job **pjob, } } + args.file_priv = __args->file_priv; job->chan = __args->chan; - args.sched_entity = __args->sched_entity; - args.file_priv = __args->file_priv; + args.sched = __args->sched; + /* Plus one to account for the HW fence. */ + args.credits = job->push.count + 1; args.in_sync.count = __args->in_sync.count; args.in_sync.s = __args->in_sync.s; @@ -415,7 +389,7 @@ nouveau_exec_ioctl_exec(struct drm_device *dev, if (ret) goto out; - args.sched_entity = &chan16->sched_entity; + args.sched = chan16->sched; args.file_priv = file_priv; args.chan = chan; diff --git a/drivers/gpu/drm/nouveau/nouveau_exec.h b/drivers/gpu/drm/nouveau/nouveau_exec.h index 5488d337bc..9b3b151fac 100644 --- a/drivers/gpu/drm/nouveau/nouveau_exec.h +++ b/drivers/gpu/drm/nouveau/nouveau_exec.h @@ -3,16 +3,12 @@ #ifndef __NOUVEAU_EXEC_H__ #define __NOUVEAU_EXEC_H__ -#include <drm/drm_exec.h> - #include "nouveau_drv.h" #include "nouveau_sched.h" struct nouveau_exec_job_args { struct drm_file *file_priv; - struct nouveau_sched_entity *sched_entity; - - struct drm_exec exec; + struct nouveau_sched *sched; struct nouveau_channel *chan; struct { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 7b69e6df57..5a887d67dc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -111,7 +111,8 @@ nouveau_gem_object_open(struct drm_gem_object *gem, struct drm_file *file_priv) if (vmm->vmm.object.oclass < NVIF_CLASS_VMM_NV50) return 0; - if (nvbo->no_share && uvmm && &uvmm->resv != nvbo->bo.base.resv) + if (nvbo->no_share && uvmm && + drm_gpuvm_resv(&uvmm->base) != nvbo->bo.base.resv) return -EPERM; ret = ttm_bo_reserve(&nvbo->bo, false, false, NULL); @@ -245,7 +246,7 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, if (unlikely(!uvmm)) return -EINVAL; - resv = &uvmm->resv; + resv = drm_gpuvm_resv(&uvmm->base); } if (!(domain & (NOUVEAU_GEM_DOMAIN_VRAM | NOUVEAU_GEM_DOMAIN_GART))) @@ -288,6 +289,11 @@ nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain, if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) nvbo->valid_domains &= domain; + if (nvbo->no_share) { + nvbo->r_obj = drm_gpuvm_resv_obj(&uvmm->base); + drm_gem_object_get(nvbo->r_obj); + } + *pnvbo = nvbo; return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_platform.c b/drivers/gpu/drm/nouveau/nouveau_platform.c index 23cd43a7fd..bf2dc7567e 100644 --- a/drivers/gpu/drm/nouveau/nouveau_platform.c +++ b/drivers/gpu/drm/nouveau/nouveau_platform.c @@ -43,11 +43,10 @@ static int nouveau_platform_probe(struct platform_device *pdev) return 0; } -static int nouveau_platform_remove(struct platform_device *pdev) +static void nouveau_platform_remove(struct platform_device *pdev) { struct drm_device *dev = platform_get_drvdata(pdev); nouveau_drm_device_remove(dev); - return 0; } #if IS_ENABLED(CONFIG_OF) @@ -93,5 +92,5 @@ struct platform_driver nouveau_platform_driver = { .of_match_table = of_match_ptr(nouveau_platform_match), }, .probe = nouveau_platform_probe, - .remove = nouveau_platform_remove, + .remove_new = nouveau_platform_remove, }; diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index 7c376c4ccd..32fa2e2739 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -12,30 +12,28 @@ #include "nouveau_abi16.h" #include "nouveau_sched.h" -/* FIXME - * - * We want to make sure that jobs currently executing can't be deferred by - * other jobs competing for the hardware. Otherwise we might end up with job - * timeouts just because of too many clients submitting too many jobs. We don't - * want jobs to time out because of system load, but because of the job being - * too bulky. - * - * For now allow for up to 16 concurrent jobs in flight until we know how many - * rings the hardware can process in parallel. - */ -#define NOUVEAU_SCHED_HW_SUBMISSIONS 16 #define NOUVEAU_SCHED_JOB_TIMEOUT_MS 10000 +/* Starts at 0, since the DRM scheduler interprets those parameters as (initial) + * index to the run-queue array. + */ +enum nouveau_sched_priority { + NOUVEAU_SCHED_PRIORITY_SINGLE = DRM_SCHED_PRIORITY_KERNEL, + NOUVEAU_SCHED_PRIORITY_COUNT, +}; + int nouveau_job_init(struct nouveau_job *job, struct nouveau_job_args *args) { - struct nouveau_sched_entity *entity = args->sched_entity; + struct nouveau_sched *sched = args->sched; int ret; + INIT_LIST_HEAD(&job->entry); + job->file_priv = args->file_priv; job->cli = nouveau_cli(args->file_priv); - job->entity = entity; + job->sched = sched; job->sync = args->sync; job->resv_usage = args->resv_usage; @@ -86,10 +84,10 @@ nouveau_job_init(struct nouveau_job *job, ret = -ENOMEM; goto err_free_objs; } - } - ret = drm_sched_job_init(&job->base, &entity->base, NULL); + ret = drm_sched_job_init(&job->base, &sched->entity, + args->credits, NULL); if (ret) goto err_free_chains; @@ -109,6 +107,27 @@ return ret; } void +nouveau_job_fini(struct nouveau_job *job) +{ + dma_fence_put(job->done_fence); + drm_sched_job_cleanup(&job->base); + + job->ops->free(job); +} + +void +nouveau_job_done(struct nouveau_job *job) +{ + struct nouveau_sched *sched = job->sched; + + spin_lock(&sched->job.list.lock); + list_del(&job->entry); + spin_unlock(&sched->job.list.lock); + + wake_up(&sched->job.wq); +} + +void nouveau_job_free(struct nouveau_job *job) { kfree(job->in_sync.data); @@ -117,13 +136,6 @@ nouveau_job_free(struct nouveau_job *job) kfree(job->out_sync.chains); } -void nouveau_job_fini(struct nouveau_job *job) -{ - dma_fence_put(job->done_fence); - drm_sched_job_cleanup(&job->base); - job->ops->free(job); -} - static int sync_find_fence(struct nouveau_job *job, struct drm_nouveau_sync *sync, @@ -261,8 +273,13 @@ nouveau_job_fence_attach(struct nouveau_job *job) int nouveau_job_submit(struct nouveau_job *job) { - struct nouveau_sched_entity *entity = to_nouveau_sched_entity(job->base.entity); + struct nouveau_sched *sched = job->sched; struct dma_fence *done_fence = NULL; + struct drm_gpuvm_exec vm_exec = { + .vm = &nouveau_cli_uvmm(job->cli)->base, + .flags = DRM_EXEC_IGNORE_DUPLICATES, + .num_fences = 1, + }; int ret; ret = nouveau_job_add_deps(job); @@ -276,46 +293,29 @@ nouveau_job_submit(struct nouveau_job *job) /* Make sure the job appears on the sched_entity's queue in the same * order as it was submitted. */ - mutex_lock(&entity->mutex); + mutex_lock(&sched->mutex); /* Guarantee we won't fail after the submit() callback returned * successfully. */ if (job->ops->submit) { - ret = job->ops->submit(job); + ret = job->ops->submit(job, &vm_exec); if (ret) goto err_cleanup; } + /* Submit was successful; add the job to the schedulers job list. */ + spin_lock(&sched->job.list.lock); + list_add(&job->entry, &sched->job.list.head); + spin_unlock(&sched->job.list.lock); + drm_sched_job_arm(&job->base); job->done_fence = dma_fence_get(&job->base.s_fence->finished); if (job->sync) done_fence = dma_fence_get(job->done_fence); - /* If a sched job depends on a dma-fence from a job from the same GPU - * scheduler instance, but a different scheduler entity, the GPU - * scheduler does only wait for the particular job to be scheduled, - * rather than for the job to fully complete. This is due to the GPU - * scheduler assuming that there is a scheduler instance per ring. - * However, the current implementation, in order to avoid arbitrary - * amounts of kthreads, has a single scheduler instance while scheduler - * entities represent rings. - * - * As a workaround, set the DRM_SCHED_FENCE_DONT_PIPELINE for all - * out-fences in order to force the scheduler to wait for full job - * completion for dependent jobs from different entities and same - * scheduler instance. - * - * There is some work in progress [1] to address the issues of firmware - * schedulers; once it is in-tree the scheduler topology in Nouveau - * should be re-worked accordingly. - * - * [1] https://lore.kernel.org/dri-devel/20230801205103.627779-1-matthew.brost@intel.com/ - */ - set_bit(DRM_SCHED_FENCE_DONT_PIPELINE, &job->done_fence->flags); - if (job->ops->armed_submit) - job->ops->armed_submit(job); + job->ops->armed_submit(job, &vm_exec); nouveau_job_fence_attach(job); @@ -326,7 +326,7 @@ nouveau_job_submit(struct nouveau_job *job) drm_sched_entity_push_job(&job->base); - mutex_unlock(&entity->mutex); + mutex_unlock(&sched->mutex); if (done_fence) { dma_fence_wait(done_fence, true); @@ -336,20 +336,13 @@ nouveau_job_submit(struct nouveau_job *job) return 0; err_cleanup: - mutex_unlock(&entity->mutex); + mutex_unlock(&sched->mutex); nouveau_job_fence_attach_cleanup(job); err: job->state = NOUVEAU_JOB_SUBMIT_FAILED; return ret; } -bool -nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity, - struct work_struct *work) -{ - return queue_work(entity->sched_wq, work); -} - static struct dma_fence * nouveau_job_run(struct nouveau_job *job) { @@ -399,50 +392,116 @@ nouveau_sched_free_job(struct drm_sched_job *sched_job) nouveau_job_fini(job); } -int nouveau_sched_entity_init(struct nouveau_sched_entity *entity, - struct drm_gpu_scheduler *sched, - struct workqueue_struct *sched_wq) -{ - mutex_init(&entity->mutex); - spin_lock_init(&entity->job.list.lock); - INIT_LIST_HEAD(&entity->job.list.head); - init_waitqueue_head(&entity->job.wq); - - entity->sched_wq = sched_wq; - return drm_sched_entity_init(&entity->base, - DRM_SCHED_PRIORITY_NORMAL, - &sched, 1, NULL); -} - -void -nouveau_sched_entity_fini(struct nouveau_sched_entity *entity) -{ - drm_sched_entity_destroy(&entity->base); -} - static const struct drm_sched_backend_ops nouveau_sched_ops = { .run_job = nouveau_sched_run_job, .timedout_job = nouveau_sched_timedout_job, .free_job = nouveau_sched_free_job, }; -int nouveau_sched_init(struct nouveau_drm *drm) +static int +nouveau_sched_init(struct nouveau_sched *sched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit) { - struct drm_gpu_scheduler *sched = &drm->sched; + struct drm_gpu_scheduler *drm_sched = &sched->base; + struct drm_sched_entity *entity = &sched->entity; long job_hang_limit = msecs_to_jiffies(NOUVEAU_SCHED_JOB_TIMEOUT_MS); + int ret; - drm->sched_wq = create_singlethread_workqueue("nouveau_sched_wq"); - if (!drm->sched_wq) + if (!wq) { + wq = alloc_workqueue("nouveau_sched_wq_%d", 0, WQ_MAX_ACTIVE, + current->pid); + if (!wq) + return -ENOMEM; + + sched->wq = wq; + } + + ret = drm_sched_init(drm_sched, &nouveau_sched_ops, wq, + NOUVEAU_SCHED_PRIORITY_COUNT, + credit_limit, 0, job_hang_limit, + NULL, NULL, "nouveau_sched", drm->dev->dev); + if (ret) + goto fail_wq; + + /* Using DRM_SCHED_PRIORITY_KERNEL, since that's what we're required to use + * when we want to have a single run-queue only. + * + * It's not documented, but one will find out when trying to use any + * other priority running into faults, because the scheduler uses the + * priority as array index. + * + * Can't use NOUVEAU_SCHED_PRIORITY_SINGLE either, because it's not + * matching the enum type used in drm_sched_entity_init(). + */ + ret = drm_sched_entity_init(entity, DRM_SCHED_PRIORITY_KERNEL, + &drm_sched, 1, NULL); + if (ret) + goto fail_sched; + + mutex_init(&sched->mutex); + spin_lock_init(&sched->job.list.lock); + INIT_LIST_HEAD(&sched->job.list.head); + init_waitqueue_head(&sched->job.wq); + + return 0; + +fail_sched: + drm_sched_fini(drm_sched); +fail_wq: + if (sched->wq) + destroy_workqueue(sched->wq); + return ret; +} + +int +nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit) +{ + struct nouveau_sched *sched; + int ret; + + sched = kzalloc(sizeof(*sched), GFP_KERNEL); + if (!sched) return -ENOMEM; - return drm_sched_init(sched, &nouveau_sched_ops, - DRM_SCHED_PRIORITY_COUNT, - NOUVEAU_SCHED_HW_SUBMISSIONS, 0, job_hang_limit, - NULL, NULL, "nouveau_sched", drm->dev->dev); + ret = nouveau_sched_init(sched, drm, wq, credit_limit); + if (ret) { + kfree(sched); + return ret; + } + + *psched = sched; + + return 0; +} + + +static void +nouveau_sched_fini(struct nouveau_sched *sched) +{ + struct drm_gpu_scheduler *drm_sched = &sched->base; + struct drm_sched_entity *entity = &sched->entity; + + rmb(); /* for list_empty to work without lock */ + wait_event(sched->job.wq, list_empty(&sched->job.list.head)); + + drm_sched_entity_fini(entity); + drm_sched_fini(drm_sched); + + /* Destroy workqueue after scheduler tear down, otherwise it might still + * be in use. + */ + if (sched->wq) + destroy_workqueue(sched->wq); } -void nouveau_sched_fini(struct nouveau_drm *drm) +void +nouveau_sched_destroy(struct nouveau_sched **psched) { - destroy_workqueue(drm->sched_wq); - drm_sched_fini(&drm->sched); + struct nouveau_sched *sched = *psched; + + nouveau_sched_fini(sched); + kfree(sched); + + *psched = NULL; } diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.h b/drivers/gpu/drm/nouveau/nouveau_sched.h index 27ac197925..e1f01a23e6 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.h +++ b/drivers/gpu/drm/nouveau/nouveau_sched.h @@ -5,7 +5,7 @@ #include <linux/types.h> -#include <drm/drm_exec.h> +#include <drm/drm_gpuvm.h> #include <drm/gpu_scheduler.h> #include "nouveau_drv.h" @@ -26,7 +26,8 @@ enum nouveau_job_state { struct nouveau_job_args { struct drm_file *file_priv; - struct nouveau_sched_entity *sched_entity; + struct nouveau_sched *sched; + u32 credits; enum dma_resv_usage resv_usage; bool sync; @@ -49,12 +50,12 @@ struct nouveau_job { enum nouveau_job_state state; - struct nouveau_sched_entity *entity; + struct nouveau_sched *sched; + struct list_head entry; struct drm_file *file_priv; struct nouveau_cli *cli; - struct drm_exec exec; enum dma_resv_usage resv_usage; struct dma_fence *done_fence; @@ -76,8 +77,8 @@ struct nouveau_job { /* If .submit() returns without any error, it is guaranteed that * armed_submit() is called. */ - int (*submit)(struct nouveau_job *); - void (*armed_submit)(struct nouveau_job *); + int (*submit)(struct nouveau_job *, struct drm_gpuvm_exec *); + void (*armed_submit)(struct nouveau_job *, struct drm_gpuvm_exec *); struct dma_fence *(*run)(struct nouveau_job *); void (*free)(struct nouveau_job *); enum drm_gpu_sched_stat (*timeout)(struct nouveau_job *); @@ -90,20 +91,17 @@ int nouveau_job_ucopy_syncs(struct nouveau_job_args *args, int nouveau_job_init(struct nouveau_job *job, struct nouveau_job_args *args); -void nouveau_job_free(struct nouveau_job *job); - -int nouveau_job_submit(struct nouveau_job *job); void nouveau_job_fini(struct nouveau_job *job); +int nouveau_job_submit(struct nouveau_job *job); +void nouveau_job_done(struct nouveau_job *job); +void nouveau_job_free(struct nouveau_job *job); -#define to_nouveau_sched_entity(entity) \ - container_of((entity), struct nouveau_sched_entity, base) - -struct nouveau_sched_entity { - struct drm_sched_entity base; +struct nouveau_sched { + struct drm_gpu_scheduler base; + struct drm_sched_entity entity; + struct workqueue_struct *wq; struct mutex mutex; - struct workqueue_struct *sched_wq; - struct { struct { struct list_head head; @@ -113,15 +111,8 @@ struct nouveau_sched_entity { } job; }; -int nouveau_sched_entity_init(struct nouveau_sched_entity *entity, - struct drm_gpu_scheduler *sched, - struct workqueue_struct *sched_wq); -void nouveau_sched_entity_fini(struct nouveau_sched_entity *entity); - -bool nouveau_sched_entity_qwork(struct nouveau_sched_entity *entity, - struct work_struct *work); - -int nouveau_sched_init(struct nouveau_drm *drm); -void nouveau_sched_fini(struct nouveau_drm *drm); +int nouveau_sched_create(struct nouveau_sched **psched, struct nouveau_drm *drm, + struct workqueue_struct *wq, u32 credit_limit); +void nouveau_sched_destroy(struct nouveau_sched **psched); #endif diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 5cf892c50f..ee02cd833c 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -62,6 +62,8 @@ struct bind_job_op { enum vm_bind_op op; u32 flags; + struct drm_gpuvm_bo *vm_bo; + struct { u64 addr; u64 range; @@ -436,8 +438,9 @@ nouveau_uvma_region_complete(struct nouveau_uvma_region *reg) static void op_map_prepare_unwind(struct nouveau_uvma *uvma) { + struct drm_gpuva *va = &uvma->va; nouveau_uvma_gem_put(uvma); - drm_gpuva_remove(&uvma->va); + drm_gpuva_remove(va); nouveau_uvma_free(uvma); } @@ -466,6 +469,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, break; case DRM_GPUVA_OP_REMAP: { struct drm_gpuva_op_remap *r = &op->remap; + struct drm_gpuva *va = r->unmap->va; if (r->next) op_map_prepare_unwind(new->next); @@ -473,7 +477,7 @@ nouveau_uvmm_sm_prepare_unwind(struct nouveau_uvmm *uvmm, if (r->prev) op_map_prepare_unwind(new->prev); - op_unmap_prepare_unwind(r->unmap->va); + op_unmap_prepare_unwind(va); break; } case DRM_GPUVA_OP_UNMAP: @@ -604,6 +608,9 @@ op_unmap_prepare(struct drm_gpuva_op_unmap *u) drm_gpuva_unmap(u); } +/* + * Note: @args should not be NULL when calling for a map operation. + */ static int nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, struct nouveau_uvma_prealloc *new, @@ -624,7 +631,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, if (ret) goto unwind; - if (args && vmm_get_range) { + if (vmm_get_range) { ret = nouveau_uvmm_vmm_get(uvmm, vmm_get_start, vmm_get_range); if (ret) { @@ -632,6 +639,7 @@ nouveau_uvmm_sm_prepare(struct nouveau_uvmm *uvmm, goto unwind; } } + break; } case DRM_GPUVA_OP_REMAP: { @@ -804,15 +812,15 @@ op_remap(struct drm_gpuva_op_remap *r, struct drm_gpuva_op_unmap *u = r->unmap; struct nouveau_uvma *uvma = uvma_from_va(u->va); u64 addr = uvma->va.va.addr; - u64 range = uvma->va.va.range; + u64 end = uvma->va.va.addr + uvma->va.va.range; if (r->prev) addr = r->prev->va.addr + r->prev->va.range; if (r->next) - range = r->next->va.addr - addr; + end = r->next->va.addr; - op_unmap_range(u, addr, range); + op_unmap_range(u, addr, end - addr); } static int @@ -929,25 +937,13 @@ nouveau_uvmm_sm_unmap_cleanup(struct nouveau_uvmm *uvmm, static int nouveau_uvmm_validate_range(struct nouveau_uvmm *uvmm, u64 addr, u64 range) { - u64 end = addr + range; - u64 kernel_managed_end = uvmm->kernel_managed_addr + - uvmm->kernel_managed_size; - if (addr & ~PAGE_MASK) return -EINVAL; if (range & ~PAGE_MASK) return -EINVAL; - if (end <= addr) - return -EINVAL; - - if (addr < NOUVEAU_VA_SPACE_START || - end > NOUVEAU_VA_SPACE_END) - return -EINVAL; - - if (addr < kernel_managed_end && - end > uvmm->kernel_managed_addr) + if (!drm_gpuvm_range_valid(&uvmm->base, addr, range)) return -EINVAL; return 0; @@ -970,6 +966,12 @@ nouveau_uvmm_bind_job_free(struct kref *kref) { struct nouveau_uvmm_bind_job *job = container_of(kref, struct nouveau_uvmm_bind_job, kref); + struct bind_job_op *op, *next; + + list_for_each_op_safe(op, next, &job->ops) { + list_del(&op->entry); + kfree(op); + } nouveau_job_free(&job->base); kfree(job); @@ -1011,14 +1013,16 @@ bind_validate_op(struct nouveau_job *job, static void bind_validate_map_sparse(struct nouveau_job *job, u64 addr, u64 range) { - struct nouveau_uvmm_bind_job *bind_job; - struct nouveau_sched_entity *entity = job->entity; + struct nouveau_sched *sched = job->sched; + struct nouveau_job *__job; struct bind_job_op *op; u64 end = addr + range; again: - spin_lock(&entity->job.list.lock); - list_for_each_entry(bind_job, &entity->job.list.head, entry) { + spin_lock(&sched->job.list.lock); + list_for_each_entry(__job, &sched->job.list.head, entry) { + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(__job); + list_for_each_op(op, &bind_job->ops) { if (op->op == OP_UNMAP) { u64 op_addr = op->va.addr; @@ -1026,7 +1030,7 @@ again: if (!(end <= op_addr || addr >= op_end)) { nouveau_uvmm_bind_job_get(bind_job); - spin_unlock(&entity->job.list.lock); + spin_unlock(&sched->job.list.lock); wait_for_completion(&bind_job->complete); nouveau_uvmm_bind_job_put(bind_job); goto again; @@ -1034,7 +1038,7 @@ again: } } } - spin_unlock(&entity->job.list.lock); + spin_unlock(&sched->job.list.lock); } static int @@ -1113,22 +1117,28 @@ bind_validate_region(struct nouveau_job *job) } static void -bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new) +bind_link_gpuvas(struct bind_job_op *bop) { + struct nouveau_uvma_prealloc *new = &bop->new; + struct drm_gpuvm_bo *vm_bo = bop->vm_bo; + struct drm_gpuva_ops *ops = bop->ops; struct drm_gpuva_op *op; drm_gpuva_for_each_op(op, ops) { switch (op->op) { case DRM_GPUVA_OP_MAP: - drm_gpuva_link(&new->map->va); + drm_gpuva_link(&new->map->va, vm_bo); break; - case DRM_GPUVA_OP_REMAP: + case DRM_GPUVA_OP_REMAP: { + struct drm_gpuva *va = op->remap.unmap->va; + if (op->remap.prev) - drm_gpuva_link(&new->prev->va); + drm_gpuva_link(&new->prev->va, va->vm_bo); if (op->remap.next) - drm_gpuva_link(&new->next->va); - drm_gpuva_unlink(op->remap.unmap->va); + drm_gpuva_link(&new->next->va, va->vm_bo); + drm_gpuva_unlink(va); break; + } case DRM_GPUVA_OP_UNMAP: drm_gpuva_unlink(op->unmap.va); break; @@ -1139,21 +1149,70 @@ bind_link_gpuvas(struct drm_gpuva_ops *ops, struct nouveau_uvma_prealloc *new) } static int -nouveau_uvmm_bind_job_submit(struct nouveau_job *job) +bind_lock_validate(struct nouveau_job *job, struct drm_exec *exec, + unsigned int num_fences) +{ + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); + struct bind_job_op *op; + int ret; + + list_for_each_op(op, &bind_job->ops) { + struct drm_gpuva_op *va_op; + + if (!op->ops) + continue; + + drm_gpuva_for_each_op(va_op, op->ops) { + struct drm_gem_object *obj = op_gem_obj(va_op); + + if (unlikely(!obj)) + continue; + + ret = drm_exec_prepare_obj(exec, obj, num_fences); + if (ret) + return ret; + + /* Don't validate GEMs backing mappings we're about to + * unmap, it's not worth the effort. + */ + if (va_op->op == DRM_GPUVA_OP_UNMAP) + continue; + + ret = nouveau_bo_validate(nouveau_gem_object(obj), + true, false); + if (ret) + return ret; + } + } + + return 0; +} + +static int +nouveau_uvmm_bind_job_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli); struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); - struct nouveau_sched_entity *entity = job->entity; - struct drm_exec *exec = &job->exec; + struct drm_exec *exec = &vme->exec; struct bind_job_op *op; int ret; list_for_each_op(op, &bind_job->ops) { if (op->op == OP_MAP) { - op->gem.obj = drm_gem_object_lookup(job->file_priv, - op->gem.handle); - if (!op->gem.obj) + struct drm_gem_object *obj = op->gem.obj = + drm_gem_object_lookup(job->file_priv, + op->gem.handle); + if (!obj) return -ENOENT; + + dma_resv_lock(obj->resv, NULL); + op->vm_bo = drm_gpuvm_bo_obtain(&uvmm->base, obj); + dma_resv_unlock(obj->resv); + if (IS_ERR(op->vm_bo)) + return PTR_ERR(op->vm_bo); + + drm_gpuvm_bo_extobj_add(op->vm_bo); } ret = bind_validate_op(job, op); @@ -1176,6 +1235,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) * unwind all GPU VA space changes on failure. */ nouveau_uvmm_lock(uvmm); + list_for_each_op(op, &bind_job->ops) { switch (op->op) { case OP_MAP_SPARSE: @@ -1287,55 +1347,13 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) } } - drm_exec_init(exec, DRM_EXEC_INTERRUPTIBLE_WAIT | - DRM_EXEC_IGNORE_DUPLICATES); + drm_exec_init(exec, vme->flags, 0); drm_exec_until_all_locked(exec) { - list_for_each_op(op, &bind_job->ops) { - struct drm_gpuva_op *va_op; - - if (IS_ERR_OR_NULL(op->ops)) - continue; - - drm_gpuva_for_each_op(va_op, op->ops) { - struct drm_gem_object *obj = op_gem_obj(va_op); - - if (unlikely(!obj)) - continue; - - ret = drm_exec_prepare_obj(exec, obj, 1); - drm_exec_retry_on_contention(exec); - if (ret) { - op = list_last_op(&bind_job->ops); - goto unwind; - } - } - } - } - - list_for_each_op(op, &bind_job->ops) { - struct drm_gpuva_op *va_op; - - if (IS_ERR_OR_NULL(op->ops)) - continue; - - drm_gpuva_for_each_op(va_op, op->ops) { - struct drm_gem_object *obj = op_gem_obj(va_op); - - if (unlikely(!obj)) - continue; - - /* Don't validate GEMs backing mappings we're about to - * unmap, it's not worth the effort. - */ - if (unlikely(va_op->op == DRM_GPUVA_OP_UNMAP)) - continue; - - ret = nouveau_bo_validate(nouveau_gem_object(obj), - true, false); - if (ret) { - op = list_last_op(&bind_job->ops); - goto unwind; - } + ret = bind_lock_validate(job, exec, vme->num_fences); + drm_exec_retry_on_contention(exec); + if (ret) { + op = list_last_op(&bind_job->ops); + goto unwind; } } @@ -1364,7 +1382,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) case OP_UNMAP_SPARSE: case OP_MAP: case OP_UNMAP: - bind_link_gpuvas(op->ops, &op->new); + bind_link_gpuvas(op); break; default: break; @@ -1372,10 +1390,6 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job) } nouveau_uvmm_unlock(uvmm); - spin_lock(&entity->job.list.lock); - list_add(&bind_job->entry, &entity->job.list.head); - spin_unlock(&entity->job.list.lock); - return 0; unwind_continue: @@ -1410,21 +1424,17 @@ unwind: } nouveau_uvmm_unlock(uvmm); - drm_exec_fini(exec); + drm_gpuvm_exec_unlock(vme); return ret; } static void -nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job) +nouveau_uvmm_bind_job_armed_submit(struct nouveau_job *job, + struct drm_gpuvm_exec *vme) { - struct drm_exec *exec = &job->exec; - struct drm_gem_object *obj; - unsigned long index; - - drm_exec_for_each_locked_object(exec, index, obj) - dma_resv_add_fence(obj->resv, job->done_fence, job->resv_usage); - - drm_exec_fini(exec); + drm_gpuvm_exec_resv_add_fence(vme, job->done_fence, + job->resv_usage, job->resv_usage); + drm_gpuvm_exec_unlock(vme); } static struct dma_fence * @@ -1462,14 +1472,11 @@ out: } static void -nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work) +nouveau_uvmm_bind_job_cleanup(struct nouveau_job *job) { - struct nouveau_uvmm_bind_job *bind_job = - container_of(work, struct nouveau_uvmm_bind_job, work); - struct nouveau_job *job = &bind_job->base; + struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); struct nouveau_uvmm *uvmm = nouveau_cli_uvmm(job->cli); - struct nouveau_sched_entity *entity = job->entity; - struct bind_job_op *op, *next; + struct bind_job_op *op; list_for_each_op(op, &bind_job->ops) { struct drm_gem_object *obj = op->gem.obj; @@ -1511,42 +1518,27 @@ nouveau_uvmm_bind_job_free_work_fn(struct work_struct *work) if (!IS_ERR_OR_NULL(op->ops)) drm_gpuva_ops_free(&uvmm->base, op->ops); + if (!IS_ERR_OR_NULL(op->vm_bo)) { + dma_resv_lock(obj->resv, NULL); + drm_gpuvm_bo_put(op->vm_bo); + dma_resv_unlock(obj->resv); + } + if (obj) drm_gem_object_put(obj); } - spin_lock(&entity->job.list.lock); - list_del(&bind_job->entry); - spin_unlock(&entity->job.list.lock); - + nouveau_job_done(job); complete_all(&bind_job->complete); - wake_up(&entity->job.wq); - - /* Remove and free ops after removing the bind job from the job list to - * avoid races against bind_validate_map_sparse(). - */ - list_for_each_op_safe(op, next, &bind_job->ops) { - list_del(&op->entry); - kfree(op); - } nouveau_uvmm_bind_job_put(bind_job); } -static void -nouveau_uvmm_bind_job_free_qwork(struct nouveau_job *job) -{ - struct nouveau_uvmm_bind_job *bind_job = to_uvmm_bind_job(job); - struct nouveau_sched_entity *entity = job->entity; - - nouveau_sched_entity_qwork(entity, &bind_job->work); -} - static struct nouveau_job_ops nouveau_bind_job_ops = { .submit = nouveau_uvmm_bind_job_submit, .armed_submit = nouveau_uvmm_bind_job_armed_submit, .run = nouveau_uvmm_bind_job_run, - .free = nouveau_uvmm_bind_job_free_qwork, + .free = nouveau_uvmm_bind_job_cleanup, }; static int @@ -1607,7 +1599,6 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob, return ret; INIT_LIST_HEAD(&job->ops); - INIT_LIST_HEAD(&job->entry); for (i = 0; i < __args->op.count; i++) { ret = bind_job_op_from_uop(&op, &__args->op.s[i]); @@ -1618,11 +1609,12 @@ nouveau_uvmm_bind_job_init(struct nouveau_uvmm_bind_job **pjob, } init_completion(&job->complete); - INIT_WORK(&job->work, nouveau_uvmm_bind_job_free_work_fn); - args.sched_entity = __args->sched_entity; args.file_priv = __args->file_priv; + args.sched = __args->sched; + args.credits = 1; + args.in_sync.count = __args->in_sync.count; args.in_sync.s = __args->in_sync.s; @@ -1648,18 +1640,6 @@ err_free: return ret; } -int -nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, - void *data, - struct drm_file *file_priv) -{ - struct nouveau_cli *cli = nouveau_cli(file_priv); - struct drm_nouveau_vm_init *init = data; - - return nouveau_uvmm_init(&cli->uvmm, cli, init->kernel_managed_addr, - init->kernel_managed_size); -} - static int nouveau_uvmm_vm_bind(struct nouveau_uvmm_bind_job_args *args) { @@ -1760,7 +1740,7 @@ nouveau_uvmm_ioctl_vm_bind(struct drm_device *dev, if (ret) return ret; - args.sched_entity = &cli->sched_entity; + args.sched = cli->sched; args.file_priv = file_priv; ret = nouveau_uvmm_vm_bind(&args); @@ -1776,15 +1756,18 @@ void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbo, struct nouveau_mem *mem) { struct drm_gem_object *obj = &nvbo->bo.base; + struct drm_gpuvm_bo *vm_bo; struct drm_gpuva *va; dma_resv_assert_held(obj->resv); - drm_gem_for_each_gpuva(va, obj) { - struct nouveau_uvma *uvma = uvma_from_va(va); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(va, vm_bo) { + struct nouveau_uvma *uvma = uvma_from_va(va); - nouveau_uvma_map(uvma, mem); - drm_gpuva_invalidate(va, false); + nouveau_uvma_map(uvma, mem); + drm_gpuva_invalidate(va, false); + } } } @@ -1792,29 +1775,62 @@ void nouveau_uvmm_bo_unmap_all(struct nouveau_bo *nvbo) { struct drm_gem_object *obj = &nvbo->bo.base; + struct drm_gpuvm_bo *vm_bo; struct drm_gpuva *va; dma_resv_assert_held(obj->resv); - drm_gem_for_each_gpuva(va, obj) { - struct nouveau_uvma *uvma = uvma_from_va(va); + drm_gem_for_each_gpuvm_bo(vm_bo, obj) { + drm_gpuvm_bo_for_each_va(va, vm_bo) { + struct nouveau_uvma *uvma = uvma_from_va(va); - nouveau_uvma_unmap(uvma); - drm_gpuva_invalidate(va, true); + nouveau_uvma_unmap(uvma); + drm_gpuva_invalidate(va, true); + } } } +static void +nouveau_uvmm_free(struct drm_gpuvm *gpuvm) +{ + struct nouveau_uvmm *uvmm = uvmm_from_gpuvm(gpuvm); + + kfree(uvmm); +} + +static int +nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) +{ + struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj); + + return nouveau_bo_validate(nvbo, true, false); +} + +static const struct drm_gpuvm_ops gpuvm_ops = { + .vm_free = nouveau_uvmm_free, + .vm_bo_validate = nouveau_uvmm_bo_validate, +}; + int -nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, - u64 kernel_managed_addr, u64 kernel_managed_size) +nouveau_uvmm_ioctl_vm_init(struct drm_device *dev, + void *data, + struct drm_file *file_priv) { + struct nouveau_uvmm *uvmm; + struct nouveau_cli *cli = nouveau_cli(file_priv); + struct drm_device *drm = cli->drm->dev; + struct drm_gem_object *r_obj; + struct drm_nouveau_vm_init *init = data; + u64 kernel_managed_end; int ret; - u64 kernel_managed_end = kernel_managed_addr + kernel_managed_size; - mutex_init(&uvmm->mutex); - dma_resv_init(&uvmm->resv); - mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN); - mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex); + if (check_add_overflow(init->kernel_managed_addr, + init->kernel_managed_size, + &kernel_managed_end)) + return -EINVAL; + + if (kernel_managed_end > NOUVEAU_VA_SPACE_END) + return -EINVAL; mutex_lock(&cli->mutex); @@ -1823,39 +1839,48 @@ nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, goto out_unlock; } - if (kernel_managed_end <= kernel_managed_addr) { - ret = -EINVAL; + uvmm = kzalloc(sizeof(*uvmm), GFP_KERNEL); + if (!uvmm) { + ret = -ENOMEM; goto out_unlock; } - if (kernel_managed_end > NOUVEAU_VA_SPACE_END) { - ret = -EINVAL; + r_obj = drm_gpuvm_resv_object_alloc(drm); + if (!r_obj) { + kfree(uvmm); + ret = -ENOMEM; goto out_unlock; } - uvmm->kernel_managed_addr = kernel_managed_addr; - uvmm->kernel_managed_size = kernel_managed_size; + mutex_init(&uvmm->mutex); + mt_init_flags(&uvmm->region_mt, MT_FLAGS_LOCK_EXTERN); + mt_set_external_lock(&uvmm->region_mt, &uvmm->mutex); - drm_gpuvm_init(&uvmm->base, cli->name, + drm_gpuvm_init(&uvmm->base, cli->name, 0, drm, r_obj, NOUVEAU_VA_SPACE_START, NOUVEAU_VA_SPACE_END, - kernel_managed_addr, kernel_managed_size, - NULL); + init->kernel_managed_addr, + init->kernel_managed_size, + &gpuvm_ops); + /* GPUVM takes care from here on. */ + drm_gem_object_put(r_obj); ret = nvif_vmm_ctor(&cli->mmu, "uvmm", cli->vmm.vmm.object.oclass, RAW, - kernel_managed_addr, kernel_managed_size, - NULL, 0, &cli->uvmm.vmm.vmm); + init->kernel_managed_addr, + init->kernel_managed_size, + NULL, 0, &uvmm->vmm.vmm); if (ret) - goto out_free_gpuva_mgr; + goto out_gpuvm_fini; - cli->uvmm.vmm.cli = cli; + uvmm->vmm.cli = cli; + cli->uvmm.ptr = uvmm; mutex_unlock(&cli->mutex); return 0; -out_free_gpuva_mgr: - drm_gpuvm_destroy(&uvmm->base); +out_gpuvm_fini: + drm_gpuvm_put(&uvmm->base); out_unlock: mutex_unlock(&cli->mutex); return ret; @@ -1867,15 +1892,8 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm) MA_STATE(mas, &uvmm->region_mt, 0, 0); struct nouveau_uvma_region *reg; struct nouveau_cli *cli = uvmm->vmm.cli; - struct nouveau_sched_entity *entity = &cli->sched_entity; struct drm_gpuva *va, *next; - if (!cli) - return; - - rmb(); /* for list_empty to work without lock */ - wait_event(entity->job.wq, list_empty(&entity->job.list.head)); - nouveau_uvmm_lock(uvmm); drm_gpuvm_for_each_va_safe(va, next, &uvmm->base) { struct nouveau_uvma *uvma = uvma_from_va(va); @@ -1910,8 +1928,6 @@ nouveau_uvmm_fini(struct nouveau_uvmm *uvmm) mutex_lock(&cli->mutex); nouveau_vmm_fini(&uvmm->vmm); - drm_gpuvm_destroy(&uvmm->base); + drm_gpuvm_put(&uvmm->base); mutex_unlock(&cli->mutex); - - dma_resv_fini(&uvmm->resv); } diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.h b/drivers/gpu/drm/nouveau/nouveau_uvmm.h index a308c59760..9d3c348581 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.h +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.h @@ -12,12 +12,6 @@ struct nouveau_uvmm { struct nouveau_vmm vmm; struct maple_tree region_mt; struct mutex mutex; - struct dma_resv resv; - - u64 kernel_managed_addr; - u64 kernel_managed_size; - - bool disabled; }; struct nouveau_uvma_region { @@ -50,8 +44,6 @@ struct nouveau_uvmm_bind_job { struct nouveau_job base; struct kref kref; - struct list_head entry; - struct work_struct work; struct completion complete; /* struct bind_job_op */ @@ -60,7 +52,7 @@ struct nouveau_uvmm_bind_job { struct nouveau_uvmm_bind_job_args { struct drm_file *file_priv; - struct nouveau_sched_entity *sched_entity; + struct nouveau_sched *sched; unsigned int flags; @@ -82,8 +74,6 @@ struct nouveau_uvmm_bind_job_args { #define to_uvmm_bind_job(job) container_of((job), struct nouveau_uvmm_bind_job, base) -int nouveau_uvmm_init(struct nouveau_uvmm *uvmm, struct nouveau_cli *cli, - u64 kernel_managed_addr, u64 kernel_managed_size); void nouveau_uvmm_fini(struct nouveau_uvmm *uvmm); void nouveau_uvmm_bo_map_all(struct nouveau_bo *nvbov, struct nouveau_mem *mem); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c index 87a62d4ff4..7d4716dcd5 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c @@ -24,7 +24,6 @@ #include "chan.h" #include "chid.h" #include "cgrp.h" -#include "chid.h" #include "runl.h" #include "priv.h" diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c index 4bf486b571..cb05f7f48a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowof.c @@ -66,11 +66,16 @@ of_init(struct nvkm_bios *bios, const char *name) return ERR_PTR(-EINVAL); } +static void of_fini(void *p) +{ + kfree(p); +} + const struct nvbios_source nvbios_of = { .name = "OpenFirmware", .init = of_init, - .fini = (void(*)(void *))kfree, + .fini = of_fini, .read = of_read, .size = of_size, .rw = false, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c index 7bcbc4895e..271bfa038f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm107.c @@ -25,6 +25,7 @@ #include <subdev/bios.h> #include <subdev/bios/init.h> +#include <subdev/gsp.h> void gm107_devinit_disable(struct nvkm_devinit *init) @@ -33,10 +34,13 @@ gm107_devinit_disable(struct nvkm_devinit *init) u32 r021c00 = nvkm_rd32(device, 0x021c00); u32 r021c04 = nvkm_rd32(device, 0x021c04); - if (r021c00 & 0x00000001) - nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0); - if (r021c00 & 0x00000004) - nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2); + /* gsp only wants to enable/disable display */ + if (!nvkm_gsp_rm(device->gsp)) { + if (r021c00 & 0x00000001) + nvkm_subdev_disable(device, NVKM_ENGINE_CE, 0); + if (r021c00 & 0x00000004) + nvkm_subdev_disable(device, NVKM_ENGINE_CE, 2); + } if (r021c04 & 0x00000001) nvkm_subdev_disable(device, NVKM_ENGINE_DISP, 0); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c index 11b4c9c274..666eb93b17 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/r535.c @@ -41,6 +41,7 @@ r535_devinit_new(const struct nvkm_devinit_func *hw, rm->dtor = r535_devinit_dtor; rm->post = hw->post; + rm->disable = hw->disable; ret = nv50_devinit_new_(rm, device, type, inst, pdevinit); if (ret) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c index d66fc35706..a73a5b5897 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/r535.c @@ -2312,8 +2312,12 @@ r535_gsp_load(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif) { struct nvkm_subdev *subdev = &gsp->subdev; int ret; + bool enable_gsp = fwif->enable; - if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", fwif->enable)) +#if IS_ENABLED(CONFIG_DRM_NOUVEAU_GSP_DEFAULT) + enable_gsp = true; +#endif + if (!nvkm_boolopt(subdev->device->cfgopt, "NvGspRm", enable_gsp)) return -EINVAL; if ((ret = r535_gsp_load_fw(gsp, "gsp", fwif->ver, &gsp->fws.rm)) || diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c index a7f3fc342d..dd5b5a17ec 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c @@ -222,8 +222,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory) void __iomem *map = NULL; /* Already mapped? */ - if (refcount_inc_not_zero(&iobj->maps)) + if (refcount_inc_not_zero(&iobj->maps)) { + /* read barrier match the wmb on refcount set */ + smp_rmb(); return iobj->map; + } /* Take the lock, and re-check that another thread hasn't * already mapped the object in the meantime. @@ -250,6 +253,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory) iobj->base.memory.ptrs = &nv50_instobj_fast; else iobj->base.memory.ptrs = &nv50_instobj_slow; + /* barrier to ensure the ptrs are written before refcount is set */ + smp_wmb(); refcount_set(&iobj->maps, 1); } diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c index e7e8fdf3ad..29682722b0 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c @@ -28,19 +28,14 @@ static void gp10b_ltc_init(struct nvkm_ltc *ltc) { struct nvkm_device *device = ltc->subdev.device; - struct iommu_fwspec *spec; + u32 sid; nvkm_wr32(device, 0x17e27c, ltc->ltc_nr); nvkm_wr32(device, 0x17e000, ltc->ltc_nr); nvkm_wr32(device, 0x100800, ltc->ltc_nr); - spec = dev_iommu_fwspec_get(device->dev); - if (spec) { - u32 sid = spec->ids[0] & 0xffff; - - /* stream ID */ + if (tegra_dev_iommu_get_stream_id(device->dev, &sid)) nvkm_wr32(device, 0x160000, sid << 2); - } } static const struct nvkm_ltc_func |