drivers/gpu/drm/xe/xe_bb.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

// SPDX-License-Identifier: MIT
/*
 * Copyright © 2022 Intel Corporation
 */

#include "xe_bb.h"

#include "instructions/xe_mi_commands.h"
#include "regs/xe_gpu_commands.h"
#include "xe_device.h"
#include "xe_exec_queue_types.h"
#include "xe_gt.h"
#include "xe_hw_fence.h"
#include "xe_sa.h"
#include "xe_sched_job.h"
#include "xe_vm_types.h"

static int bb_prefetch(struct xe_gt *gt)
{
	struct xe_device *xe = gt_to_xe(gt);

	if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
		/*
		 * RCS and CCS require 1K, although other engines would be
		 * okay with 512.
		 */
		return SZ_1K;
	else
		return SZ_512;
}

struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm)
{
	struct xe_tile *tile = gt_to_tile(gt);
	struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
	int err;

	if (!bb)
		return ERR_PTR(-ENOMEM);

	/*
	 * We need to allocate space for the requested number of dwords,
	 * one additional MI_BATCH_BUFFER_END dword, and additional buffer
	 * space to accomodate the platform-specific hardware prefetch
	 * requirements.
	 */
	bb->bo = xe_sa_bo_new(!usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
			      4 * (dwords + 1) + bb_prefetch(gt));
	if (IS_ERR(bb->bo)) {
		err = PTR_ERR(bb->bo);
		goto err;
	}

	bb->cs = xe_sa_bo_cpu_addr(bb->bo);
	bb->len = 0;

	return bb;
err:
	kfree(bb);
	return ERR_PTR(err);
}

static struct xe_sched_job *
__xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
{
	u32 size = drm_suballoc_size(bb->bo);

	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;

	xe_gt_assert(q->gt, bb->len * 4 + bb_prefetch(q->gt) <= size);

	xe_sa_bo_flush_write(bb->bo);

	return xe_sched_job_create(q, addr);
}

struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
						struct xe_bb *bb,
						u64 batch_base_ofs,
						u32 second_idx)
{
	u64 addr[2] = {
		batch_base_ofs + drm_suballoc_soffset(bb->bo),
		batch_base_ofs + drm_suballoc_soffset(bb->bo) +
		4 * second_idx,
	};

	xe_gt_assert(q->gt, second_idx <= bb->len);
	xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
	xe_gt_assert(q->gt, q->width == 1);

	return __xe_bb_create_job(q, bb, addr);
}

struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
				      struct xe_bb *bb)
{
	u64 addr = xe_sa_bo_gpu_addr(bb->bo);

	xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
	xe_gt_assert(q->gt, q->width == 1);
	return __xe_bb_create_job(q, bb, &addr);
}

void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence)
{
	if (!bb)
		return;

	xe_sa_bo_free(bb->bo, fence);
	kfree(bb);
}