summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c135
1 files changed, 84 insertions, 51 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 8e368e465..9af1d0943 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -231,7 +231,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
return r;
}
-void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr,
unsigned long offset, unsigned long npages)
{
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
@@ -249,7 +249,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
}
}
-void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma)
+void svm_range_dma_unmap(struct svm_range *prange)
{
struct kfd_process_device *pdd;
dma_addr_t *dma_addr;
@@ -270,10 +270,8 @@ void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma)
continue;
}
dev = &pdd->dev->adev->pdev->dev;
- if (unmap_dma)
- svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
- kvfree(dma_addr);
- prange->dma_addr[gpuidx] = NULL;
+
+ svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages);
}
}
@@ -281,18 +279,29 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap)
{
uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
+ uint32_t gpuidx;
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
prange->start, prange->last);
svm_range_vram_node_free(prange);
- svm_range_free_dma_mappings(prange, do_unmap);
+ if (do_unmap)
+ svm_range_dma_unmap(prange);
if (do_unmap && !p->xnack_enabled) {
pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
}
+
+ /* free dma_addr array for each gpu */
+ for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
+ if (prange->dma_addr[gpuidx]) {
+ kvfree(prange->dma_addr[gpuidx]);
+ prange->dma_addr[gpuidx] = NULL;
+ }
+ }
+
mutex_destroy(&prange->lock);
mutex_destroy(&prange->migrate_mutex);
kfree(prange);
@@ -391,14 +400,9 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(&svm_bo->list_lock);
}
spin_unlock(&svm_bo->list_lock);
- if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base)) {
- /* We're not in the eviction worker.
- * Signal the fence and synchronize with any
- * pending eviction work.
- */
+ if (!dma_fence_is_signaled(&svm_bo->eviction_fence->base))
+ /* We're not in the eviction worker. Signal the fence. */
dma_fence_signal(&svm_bo->eviction_fence->base);
- cancel_work_sync(&svm_bo->eviction_work);
- }
dma_fence_put(&svm_bo->eviction_fence->base);
amdgpu_bo_unref(&svm_bo->bo);
kfree(svm_bo);
@@ -1085,26 +1089,32 @@ svm_range_split(struct svm_range *prange, uint64_t start, uint64_t last,
}
static int
-svm_range_split_tail(struct svm_range *prange,
- uint64_t new_last, struct list_head *insert_list)
+svm_range_split_tail(struct svm_range *prange, uint64_t new_last,
+ struct list_head *insert_list, struct list_head *remap_list)
{
struct svm_range *tail;
int r = svm_range_split(prange, prange->start, new_last, &tail);
- if (!r)
+ if (!r) {
list_add(&tail->list, insert_list);
+ if (!IS_ALIGNED(new_last + 1, 1UL << prange->granularity))
+ list_add(&tail->update_list, remap_list);
+ }
return r;
}
static int
-svm_range_split_head(struct svm_range *prange,
- uint64_t new_start, struct list_head *insert_list)
+svm_range_split_head(struct svm_range *prange, uint64_t new_start,
+ struct list_head *insert_list, struct list_head *remap_list)
{
struct svm_range *head;
int r = svm_range_split(prange, new_start, prange->last, &head);
- if (!r)
+ if (!r) {
list_add(&head->list, insert_list);
+ if (!IS_ALIGNED(new_start, 1UL << prange->granularity))
+ list_add(&head->update_list, remap_list);
+ }
return r;
}
@@ -1196,14 +1206,15 @@ svm_range_get_pte_flags(struct kfd_node *node,
uint32_t mapping_flags = 0;
uint64_t pte_flags;
bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN);
- bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT;
+ bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT);
+ bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT;
bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/
unsigned int mtype_local;
if (domain == SVM_RANGE_VRAM_DOMAIN)
bo_node = prange->svm_bo->node;
- switch (node->adev->ip_versions[GC_HWIP][0]) {
+ switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) {
case IP_VERSION(9, 4, 1):
if (domain == SVM_RANGE_VRAM_DOMAIN) {
if (bo_node == node) {
@@ -1239,8 +1250,11 @@ svm_range_get_pte_flags(struct kfd_node *node,
}
break;
case IP_VERSION(9, 4, 3):
- mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
- (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW);
+ if (ext_coherent)
+ mtype_local = node->adev->rev_id ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_UC;
+ else
+ mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC :
+ amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
snoop = true;
if (uncached) {
mapping_flags |= AMDGPU_VM_MTYPE_UC;
@@ -1249,10 +1263,12 @@ svm_range_get_pte_flags(struct kfd_node *node,
if (bo_node->adev == node->adev &&
(!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id))
mapping_flags |= mtype_local;
- /* local HBM region far from partition or remote XGMI GPU */
- else if (svm_nodes_in_same_hive(bo_node, node))
+ /* local HBM region far from partition or remote XGMI GPU
+ * with regular system scope coherence
+ */
+ else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent)
mapping_flags |= AMDGPU_VM_MTYPE_NC;
- /* PCIe P2P */
+ /* PCIe P2P or extended system scope coherence */
else
mapping_flags |= AMDGPU_VM_MTYPE_UC;
/* system memory accessed by the APU */
@@ -1263,7 +1279,7 @@ svm_range_get_pte_flags(struct kfd_node *node,
if (num_possible_nodes() <= 1)
mapping_flags |= mtype_local;
else
- mapping_flags |= AMDGPU_VM_MTYPE_NC;
+ mapping_flags |= ext_coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
/* system memory accessed by the dGPU */
} else {
mapping_flags |= AMDGPU_VM_MTYPE_UC;
@@ -1298,7 +1314,7 @@ svm_range_unmap_from_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
pr_debug("[0x%llx 0x%llx]\n", start, last);
- return amdgpu_vm_update_range(adev, vm, false, true, true, NULL, start,
+ return amdgpu_vm_update_range(adev, vm, false, true, true, false, NULL, start,
last, init_pte_value, 0, 0, NULL, NULL,
fence);
}
@@ -1405,8 +1421,8 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange,
* different memory partition based on fpfn/lpfn, we should use
* same vm_manager.vram_base_offset regardless memory partition.
*/
- r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
- last_start, prange->start + i,
+ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, true,
+ NULL, last_start, prange->start + i,
pte_flags,
(last_start - prange->start) << PAGE_SHIFT,
bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
@@ -2051,6 +2067,7 @@ svm_range_split_new(struct svm_range_list *svms, uint64_t start, uint64_t last,
* @update_list: output, the ranges need validate and update GPU mapping
* @insert_list: output, the ranges need insert to svms
* @remove_list: output, the ranges are replaced and need remove from svms
+ * @remap_list: output, remap unaligned svm ranges
*
* Check if the virtual address range has overlap with any existing ranges,
* split partly overlapping ranges and add new ranges in the gaps. All changes
@@ -2074,7 +2091,7 @@ static int
svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs,
struct list_head *update_list, struct list_head *insert_list,
- struct list_head *remove_list)
+ struct list_head *remove_list, struct list_head *remap_list)
{
unsigned long last = start + size - 1UL;
struct svm_range_list *svms = &p->svms;
@@ -2090,6 +2107,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
INIT_LIST_HEAD(insert_list);
INIT_LIST_HEAD(remove_list);
INIT_LIST_HEAD(&new_list);
+ INIT_LIST_HEAD(remap_list);
node = interval_tree_iter_first(&svms->objects, start, last);
while (node) {
@@ -2126,14 +2144,14 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
if (node->start < start) {
pr_debug("change old range start\n");
r = svm_range_split_head(prange, start,
- insert_list);
+ insert_list, remap_list);
if (r)
goto out;
}
if (node->last > last) {
pr_debug("change old range last\n");
r = svm_range_split_tail(prange, last,
- insert_list);
+ insert_list, remap_list);
if (r)
goto out;
}
@@ -2348,8 +2366,10 @@ retry:
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
- /* Pairs with mmget in svm_range_add_list_work */
- mmput(mm);
+ /* Pairs with mmget in svm_range_add_list_work. If dropping the
+ * last mm refcount, schedule release work to avoid circular locking
+ */
+ mmput_async(mm);
spin_lock(&svms->deferred_list_lock);
}
@@ -2660,6 +2680,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
{
struct vm_area_struct *vma;
struct interval_tree_node *node;
+ struct rb_node *rb_node;
unsigned long start_limit, end_limit;
vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
@@ -2679,16 +2700,15 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
if (node) {
end_limit = min(end_limit, node->start);
/* Last range that ends before the fault address */
- node = container_of(rb_prev(&node->rb),
- struct interval_tree_node, rb);
+ rb_node = rb_prev(&node->rb);
} else {
/* Last range must end before addr because
* there was no range after addr
*/
- node = container_of(rb_last(&p->svms.objects.rb_root),
- struct interval_tree_node, rb);
+ rb_node = rb_last(&p->svms.objects.rb_root);
}
- if (node) {
+ if (rb_node) {
+ node = container_of(rb_node, struct interval_tree_node, rb);
if (node->last >= addr) {
WARN(1, "Overlap with prev node and page fault addr\n");
return -EFAULT;
@@ -3424,13 +3444,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange,
int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence)
{
- if (!fence)
- return -EINVAL;
-
- if (dma_fence_is_signaled(&fence->base))
- return 0;
-
- if (fence->svm_bo) {
+ /* Dereferencing fence->svm_bo is safe here because the fence hasn't
+ * signaled yet and we're under the protection of the fence->lock.
+ * After the fence is signaled in svm_range_bo_release, we cannot get
+ * here any more.
+ *
+ * Reference is dropped in svm_range_evict_svm_bo_worker.
+ */
+ if (svm_bo_ref_unless_zero(fence->svm_bo)) {
WRITE_ONCE(fence->svm_bo->evicting, 1);
schedule_work(&fence->svm_bo->eviction_work);
}
@@ -3445,8 +3466,6 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work)
int r = 0;
svm_bo = container_of(work, struct svm_range_bo, eviction_work);
- if (!svm_bo_ref_unless_zero(svm_bo))
- return; /* svm_bo was freed while eviction was pending */
if (mmget_not_zero(svm_bo->eviction_fence->mm)) {
mm = svm_bo->eviction_fence->mm;
@@ -3509,6 +3528,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
struct list_head update_list;
struct list_head insert_list;
struct list_head remove_list;
+ struct list_head remap_list;
struct svm_range_list *svms;
struct svm_range *prange;
struct svm_range *next;
@@ -3540,7 +3560,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
/* Add new range and split existing ranges as needed */
r = svm_range_add(p, start, size, nattr, attrs, &update_list,
- &insert_list, &remove_list);
+ &insert_list, &remove_list, &remap_list);
if (r) {
mutex_unlock(&svms->lock);
mmap_write_unlock(mm);
@@ -3605,6 +3625,19 @@ out_unlock_range:
ret = r;
}
+ list_for_each_entry(prange, &remap_list, update_list) {
+ pr_debug("Remapping prange 0x%p [0x%lx 0x%lx]\n",
+ prange, prange->start, prange->last);
+ mutex_lock(&prange->migrate_mutex);
+ r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE,
+ true, true, prange->mapped_to_gpu);
+ if (r)
+ pr_debug("failed %d on remap svm range\n", r);
+ mutex_unlock(&prange->migrate_mutex);
+ if (r)
+ ret = r;
+ }
+
dynamic_svm_range_dump(svms);
mutex_unlock(&svms->lock);