diff options
Diffstat (limited to '')
39 files changed, 15532 insertions, 0 deletions
diff --git a/intel/.gitignore b/intel/.gitignore new file mode 100644 index 0000000..528b408 --- /dev/null +++ b/intel/.gitignore @@ -0,0 +1 @@ +test_decode diff --git a/intel/Android.mk b/intel/Android.mk new file mode 100644 index 0000000..f45312d --- /dev/null +++ b/intel/Android.mk @@ -0,0 +1,38 @@ +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# + +LOCAL_PATH := $(call my-dir) +include $(CLEAR_VARS) + +# Import variables LIBDRM_INTEL_FILES, LIBDRM_INTEL_H_FILES +include $(LOCAL_PATH)/Makefile.sources + +LOCAL_MODULE := libdrm_intel + +LOCAL_SRC_FILES := $(LIBDRM_INTEL_FILES) + +LOCAL_SHARED_LIBRARIES := \ + libdrm + +include $(LIBDRM_COMMON_MK) +include $(BUILD_SHARED_LIBRARY) diff --git a/intel/Makefile.sources b/intel/Makefile.sources new file mode 100644 index 0000000..6947ab7 --- /dev/null +++ b/intel/Makefile.sources @@ -0,0 +1,15 @@ +LIBDRM_INTEL_FILES := \ + intel_bufmgr.c \ + intel_bufmgr_priv.h \ + intel_bufmgr_fake.c \ + intel_bufmgr_gem.c \ + intel_decode.c \ + intel_chipset.h \ + mm.c \ + mm.h \ + uthash.h + +LIBDRM_INTEL_H_FILES := \ + intel_bufmgr.h \ + intel_aub.h \ + intel_debug.h diff --git a/intel/intel-symbols.txt b/intel/intel-symbols.txt new file mode 100644 index 0000000..132df96 --- /dev/null +++ b/intel/intel-symbols.txt @@ -0,0 +1,83 @@ +drm_intel_bo_alloc +drm_intel_bo_alloc_for_render +drm_intel_bo_alloc_tiled +drm_intel_bo_alloc_userptr +drm_intel_bo_busy +drm_intel_bo_disable_reuse +drm_intel_bo_emit_reloc +drm_intel_bo_emit_reloc_fence +drm_intel_bo_exec +drm_intel_bo_fake_alloc_static +drm_intel_bo_fake_disable_backing_store +drm_intel_bo_flink +drm_intel_bo_gem_create_from_name +drm_intel_bo_gem_create_from_prime +drm_intel_bo_gem_export_to_prime +drm_intel_bo_get_subdata +drm_intel_bo_get_tiling +drm_intel_bo_is_reusable +drm_intel_bo_madvise +drm_intel_bo_map +drm_intel_bo_mrb_exec +drm_intel_bo_pin +drm_intel_bo_reference +drm_intel_bo_references +drm_intel_bo_set_softpin_offset +drm_intel_bo_set_tiling +drm_intel_bo_subdata +drm_intel_bo_unmap +drm_intel_bo_unpin +drm_intel_bo_unreference +drm_intel_bo_use_48b_address_range +drm_intel_bo_wait_rendering +drm_intel_bufmgr_check_aperture_space +drm_intel_bufmgr_destroy +drm_intel_bufmgr_fake_contended_lock_take +drm_intel_bufmgr_fake_evict_all +drm_intel_bufmgr_fake_init +drm_intel_bufmgr_fake_set_exec_callback +drm_intel_bufmgr_fake_set_fence_callback +drm_intel_bufmgr_fake_set_last_dispatch +drm_intel_bufmgr_gem_can_disable_implicit_sync +drm_intel_bufmgr_gem_enable_fenced_relocs +drm_intel_bufmgr_gem_enable_reuse +drm_intel_bufmgr_gem_get_devid +drm_intel_bufmgr_gem_init +drm_intel_bufmgr_gem_set_aub_annotations +drm_intel_bufmgr_gem_set_aub_dump +drm_intel_bufmgr_gem_set_aub_filename +drm_intel_bufmgr_gem_set_vma_cache_size +drm_intel_bufmgr_set_debug +drm_intel_decode +drm_intel_decode_context_alloc +drm_intel_decode_context_free +drm_intel_decode_set_batch_pointer +drm_intel_decode_set_dump_past_end +drm_intel_decode_set_head_tail +drm_intel_decode_set_output_file +drm_intel_gem_bo_aub_dump_bmp +drm_intel_gem_bo_clear_relocs +drm_intel_gem_bo_context_exec +drm_intel_gem_bo_disable_implicit_sync +drm_intel_gem_bo_enable_implicit_sync +drm_intel_gem_bo_fence_exec +drm_intel_gem_bo_get_reloc_count +drm_intel_gem_bo_map__cpu +drm_intel_gem_bo_map__gtt +drm_intel_gem_bo_map__wc +drm_intel_gem_bo_map_gtt +drm_intel_gem_bo_map_unsynchronized +drm_intel_gem_bo_start_gtt_access +drm_intel_gem_bo_unmap_gtt +drm_intel_gem_bo_wait +drm_intel_gem_context_create +drm_intel_gem_context_destroy +drm_intel_gem_context_get_id +drm_intel_get_aperture_sizes +drm_intel_get_eu_total +drm_intel_get_min_eu_in_pool +drm_intel_get_pipe_from_crtc_id +drm_intel_get_pooled_eu +drm_intel_get_reset_stats +drm_intel_get_subslice_total +drm_intel_reg_read diff --git a/intel/intel_aub.h b/intel/intel_aub.h new file mode 100644 index 0000000..5f0aba8 --- /dev/null +++ b/intel/intel_aub.h @@ -0,0 +1,153 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** @file intel_aub.h + * + * The AUB file is a file format used by Intel's internal simulation + * and other validation tools. It can be used at various levels by a + * driver to input state to the simulated hardware or a replaying + * debugger. + * + * We choose to dump AUB files using the trace block format for ease + * of implementation -- dump out the blocks of memory as plain blobs + * and insert ring commands to execute the batchbuffer blob. + */ + +#ifndef _INTEL_AUB_H +#define _INTEL_AUB_H + +#define AUB_MI_NOOP (0) +#define AUB_MI_BATCH_BUFFER_START (0x31 << 23) +#define AUB_PIPE_CONTROL (0x7a000002) + +/* DW0: instruction type. */ + +#define CMD_AUB (7 << 29) + +#define CMD_AUB_HEADER (CMD_AUB | (1 << 23) | (0x05 << 16)) +/* DW1 */ +# define AUB_HEADER_MAJOR_SHIFT 24 +# define AUB_HEADER_MINOR_SHIFT 16 + +#define CMD_AUB_TRACE_HEADER_BLOCK (CMD_AUB | (1 << 23) | (0x41 << 16)) +#define CMD_AUB_DUMP_BMP (CMD_AUB | (1 << 23) | (0x9e << 16)) + +/* DW1 */ +#define AUB_TRACE_OPERATION_MASK 0x000000ff +#define AUB_TRACE_OP_COMMENT 0x00000000 +#define AUB_TRACE_OP_DATA_WRITE 0x00000001 +#define AUB_TRACE_OP_COMMAND_WRITE 0x00000002 +#define AUB_TRACE_OP_MMIO_WRITE 0x00000003 +// operation = TRACE_DATA_WRITE, Type +#define AUB_TRACE_TYPE_MASK 0x0000ff00 +#define AUB_TRACE_TYPE_NOTYPE (0 << 8) +#define AUB_TRACE_TYPE_BATCH (1 << 8) +#define AUB_TRACE_TYPE_VERTEX_BUFFER (5 << 8) +#define AUB_TRACE_TYPE_2D_MAP (6 << 8) +#define AUB_TRACE_TYPE_CUBE_MAP (7 << 8) +#define AUB_TRACE_TYPE_VOLUME_MAP (9 << 8) +#define AUB_TRACE_TYPE_1D_MAP (10 << 8) +#define AUB_TRACE_TYPE_CONSTANT_BUFFER (11 << 8) +#define AUB_TRACE_TYPE_CONSTANT_URB (12 << 8) +#define AUB_TRACE_TYPE_INDEX_BUFFER (13 << 8) +#define AUB_TRACE_TYPE_GENERAL (14 << 8) +#define AUB_TRACE_TYPE_SURFACE (15 << 8) + + +// operation = TRACE_COMMAND_WRITE, Type = +#define AUB_TRACE_TYPE_RING_HWB (1 << 8) +#define AUB_TRACE_TYPE_RING_PRB0 (2 << 8) +#define AUB_TRACE_TYPE_RING_PRB1 (3 << 8) +#define AUB_TRACE_TYPE_RING_PRB2 (4 << 8) + +// Address space +#define AUB_TRACE_ADDRESS_SPACE_MASK 0x00ff0000 +#define AUB_TRACE_MEMTYPE_GTT (0 << 16) +#define AUB_TRACE_MEMTYPE_LOCAL (1 << 16) +#define AUB_TRACE_MEMTYPE_NONLOCAL (2 << 16) +#define AUB_TRACE_MEMTYPE_PCI (3 << 16) +#define AUB_TRACE_MEMTYPE_GTT_ENTRY (4 << 16) + +/* DW2 */ + +/** + * aub_state_struct_type enum values are encoded with the top 16 bits + * representing the type to be delivered to the .aub file, and the bottom 16 + * bits representing the subtype. This macro performs the encoding. + */ +#define ENCODE_SS_TYPE(type, subtype) (((type) << 16) | (subtype)) + +enum aub_state_struct_type { + AUB_TRACE_VS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 1), + AUB_TRACE_GS_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 2), + AUB_TRACE_CLIP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 3), + AUB_TRACE_SF_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 4), + AUB_TRACE_WM_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 5), + AUB_TRACE_CC_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 6), + AUB_TRACE_CLIP_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 7), + AUB_TRACE_SF_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 8), + AUB_TRACE_CC_VP_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x9), + AUB_TRACE_SAMPLER_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xa), + AUB_TRACE_KERNEL_INSTRUCTIONS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xb), + AUB_TRACE_SCRATCH_SPACE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xc), + AUB_TRACE_SAMPLER_DEFAULT_COLOR = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0xd), + + AUB_TRACE_SCISSOR_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x15), + AUB_TRACE_BLEND_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x16), + AUB_TRACE_DEPTH_STENCIL_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_GENERAL, 0x17), + + AUB_TRACE_VERTEX_BUFFER = ENCODE_SS_TYPE(AUB_TRACE_TYPE_VERTEX_BUFFER, 0), + AUB_TRACE_BINDING_TABLE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x100), + AUB_TRACE_SURFACE_STATE = ENCODE_SS_TYPE(AUB_TRACE_TYPE_SURFACE, 0x200), + AUB_TRACE_VS_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 0), + AUB_TRACE_WM_CONSTANTS = ENCODE_SS_TYPE(AUB_TRACE_TYPE_CONSTANT_BUFFER, 1), +}; + +#undef ENCODE_SS_TYPE + +/** + * Decode a aub_state_struct_type value to determine the type that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_TYPE(enum aub_state_struct_type ss_type) +{ + return (ss_type & 0xFFFF0000) >> 16; +} + +/** + * Decode a state_struct_type value to determine the subtype that should be + * stored in the .aub file. + */ +static inline uint32_t AUB_TRACE_SUBTYPE(enum aub_state_struct_type ss_type) +{ + return ss_type & 0xFFFF; +} + +/* DW3: address */ +/* DW4: len */ + +#endif /* _INTEL_AUB_H */ diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c new file mode 100644 index 0000000..68d97c0 --- /dev/null +++ b/intel/intel_bufmgr.c @@ -0,0 +1,382 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include <string.h> +#include <stdlib.h> +#include <stdint.h> +#include <assert.h> +#include <errno.h> +#include <drm.h> +#include <i915_drm.h> +#ifndef __ANDROID__ +#include <pciaccess.h> +#endif +#include "libdrm_macros.h" +#include "intel_bufmgr.h" +#include "intel_bufmgr_priv.h" +#include "xf86drm.h" + +/** @file intel_bufmgr.c + * + * Convenience functions for buffer management methods. + */ + +drm_public drm_intel_bo * +drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment) +{ + return bufmgr->bo_alloc(bufmgr, name, size, alignment); +} + +drm_public drm_intel_bo * +drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment) +{ + return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment); +} + +drm_public drm_intel_bo * +drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, void *addr, + uint32_t tiling_mode, + uint32_t stride, + unsigned long size, + unsigned long flags) +{ + if (bufmgr->bo_alloc_userptr) + return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode, + stride, size, flags); + return NULL; +} + +drm_public drm_intel_bo * +drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, + int x, int y, int cpp, uint32_t *tiling_mode, + unsigned long *pitch, unsigned long flags) +{ + return bufmgr->bo_alloc_tiled(bufmgr, name, x, y, cpp, + tiling_mode, pitch, flags); +} + +drm_public void +drm_intel_bo_reference(drm_intel_bo *bo) +{ + bo->bufmgr->bo_reference(bo); +} + +drm_public void +drm_intel_bo_unreference(drm_intel_bo *bo) +{ + if (bo == NULL) + return; + + bo->bufmgr->bo_unreference(bo); +} + +drm_public int +drm_intel_bo_map(drm_intel_bo *buf, int write_enable) +{ + return buf->bufmgr->bo_map(buf, write_enable); +} + +drm_public int +drm_intel_bo_unmap(drm_intel_bo *buf) +{ + return buf->bufmgr->bo_unmap(buf); +} + +drm_public int +drm_intel_bo_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, const void *data) +{ + return bo->bufmgr->bo_subdata(bo, offset, size, data); +} + +drm_public int +drm_intel_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, void *data) +{ + int ret; + if (bo->bufmgr->bo_get_subdata) + return bo->bufmgr->bo_get_subdata(bo, offset, size, data); + + if (size == 0 || data == NULL) + return 0; + + ret = drm_intel_bo_map(bo, 0); + if (ret) + return ret; + memcpy(data, (unsigned char *)bo->virtual + offset, size); + drm_intel_bo_unmap(bo); + return 0; +} + +drm_public void +drm_intel_bo_wait_rendering(drm_intel_bo *bo) +{ + bo->bufmgr->bo_wait_rendering(bo); +} + +drm_public void +drm_intel_bufmgr_destroy(drm_intel_bufmgr *bufmgr) +{ + bufmgr->destroy(bufmgr); +} + +drm_public int +drm_intel_bo_exec(drm_intel_bo *bo, int used, + drm_clip_rect_t * cliprects, int num_cliprects, int DR4) +{ + return bo->bufmgr->bo_exec(bo, used, cliprects, num_cliprects, DR4); +} + +drm_public int +drm_intel_bo_mrb_exec(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + unsigned int rings) +{ + if (bo->bufmgr->bo_mrb_exec) + return bo->bufmgr->bo_mrb_exec(bo, used, + cliprects, num_cliprects, DR4, + rings); + + switch (rings) { + case I915_EXEC_DEFAULT: + case I915_EXEC_RENDER: + return bo->bufmgr->bo_exec(bo, used, + cliprects, num_cliprects, DR4); + default: + return -ENODEV; + } +} + +drm_public void +drm_intel_bufmgr_set_debug(drm_intel_bufmgr *bufmgr, int enable_debug) +{ + bufmgr->debug = enable_debug; +} + +drm_public int +drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count) +{ + return bo_array[0]->bufmgr->check_aperture_space(bo_array, count); +} + +drm_public int +drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name) +{ + if (bo->bufmgr->bo_flink) + return bo->bufmgr->bo_flink(bo, name); + + return -ENODEV; +} + +drm_public int +drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + return bo->bufmgr->bo_emit_reloc(bo, offset, + target_bo, target_offset, + read_domains, write_domain); +} + +/* For fence registers, not GL fences */ +drm_public int +drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + return bo->bufmgr->bo_emit_reloc_fence(bo, offset, + target_bo, target_offset, + read_domains, write_domain); +} + + +drm_public int +drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment) +{ + if (bo->bufmgr->bo_pin) + return bo->bufmgr->bo_pin(bo, alignment); + + return -ENODEV; +} + +drm_public int +drm_intel_bo_unpin(drm_intel_bo *bo) +{ + if (bo->bufmgr->bo_unpin) + return bo->bufmgr->bo_unpin(bo); + + return -ENODEV; +} + +drm_public int +drm_intel_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t stride) +{ + if (bo->bufmgr->bo_set_tiling) + return bo->bufmgr->bo_set_tiling(bo, tiling_mode, stride); + + *tiling_mode = I915_TILING_NONE; + return 0; +} + +drm_public int +drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode) +{ + if (bo->bufmgr->bo_get_tiling) + return bo->bufmgr->bo_get_tiling(bo, tiling_mode, swizzle_mode); + + *tiling_mode = I915_TILING_NONE; + *swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + return 0; +} + +drm_public int +drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) +{ + if (bo->bufmgr->bo_set_softpin_offset) + return bo->bufmgr->bo_set_softpin_offset(bo, offset); + + return -ENODEV; +} + +drm_public int +drm_intel_bo_disable_reuse(drm_intel_bo *bo) +{ + if (bo->bufmgr->bo_disable_reuse) + return bo->bufmgr->bo_disable_reuse(bo); + return 0; +} + +drm_public int +drm_intel_bo_is_reusable(drm_intel_bo *bo) +{ + if (bo->bufmgr->bo_is_reusable) + return bo->bufmgr->bo_is_reusable(bo); + return 0; +} + +drm_public int +drm_intel_bo_busy(drm_intel_bo *bo) +{ + if (bo->bufmgr->bo_busy) + return bo->bufmgr->bo_busy(bo); + return 0; +} + +drm_public int +drm_intel_bo_madvise(drm_intel_bo *bo, int madv) +{ + if (bo->bufmgr->bo_madvise) + return bo->bufmgr->bo_madvise(bo, madv); + return -1; +} + +drm_public int +drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) +{ + if (bo->bufmgr->bo_use_48b_address_range) { + bo->bufmgr->bo_use_48b_address_range(bo, enable); + return 0; + } + + return -ENODEV; +} + +drm_public int +drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + return bo->bufmgr->bo_references(bo, target_bo); +} + +drm_public int +drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) +{ + if (bufmgr->get_pipe_from_crtc_id) + return bufmgr->get_pipe_from_crtc_id(bufmgr, crtc_id); + return -1; +} + +#ifndef __ANDROID__ +static size_t +drm_intel_probe_agp_aperture_size(int fd) +{ + struct pci_device *pci_dev; + size_t size = 0; + int ret; + + ret = pci_system_init(); + if (ret) + goto err; + + /* XXX handle multiple adaptors? */ + pci_dev = pci_device_find_by_slot(0, 0, 2, 0); + if (pci_dev == NULL) + goto err; + + ret = pci_device_probe(pci_dev); + if (ret) + goto err; + + size = pci_dev->regions[2].size; +err: + pci_system_cleanup (); + return size; +} +#else +static size_t +drm_intel_probe_agp_aperture_size(int fd) +{ + /* Nothing seems to rely on this value on Android anyway... */ + fprintf(stderr, "%s: Mappable aperture size hardcoded to 64MiB\n", __func__); + return 64 * 1024 * 1024; +} +#endif + +drm_public int +drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total) +{ + + struct drm_i915_gem_get_aperture aperture; + int ret; + + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + if (ret) + return ret; + + *mappable = 0; + /* XXX add a query for the kernel value? */ + if (*mappable == 0) + *mappable = drm_intel_probe_agp_aperture_size(fd); + if (*mappable == 0) + *mappable = 64 * 1024 * 1024; /* minimum possible value */ + *total = aperture.aper_size; + return 0; +} diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h new file mode 100644 index 0000000..693472a --- /dev/null +++ b/intel/intel_bufmgr.h @@ -0,0 +1,341 @@ +/* + * Copyright © 2008-2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** + * @file intel_bufmgr.h + * + * Public definitions of Intel-specific bufmgr functions. + */ + +#ifndef INTEL_BUFMGR_H +#define INTEL_BUFMGR_H + +#include <stdio.h> +#include <stdint.h> +#include <stdio.h> + +#if defined(__cplusplus) +extern "C" { +#endif + +struct drm_clip_rect; + +typedef struct _drm_intel_bufmgr drm_intel_bufmgr; +typedef struct _drm_intel_context drm_intel_context; +typedef struct _drm_intel_bo drm_intel_bo; + +struct _drm_intel_bo { + /** + * Size in bytes of the buffer object. + * + * The size may be larger than the size originally requested for the + * allocation, such as being aligned to page size. + */ + unsigned long size; + + /** + * Alignment requirement for object + * + * Used for GTT mapping & pinning the object. + */ + unsigned long align; + + /** + * Deprecated field containing (possibly the low 32-bits of) the last + * seen virtual card address. Use offset64 instead. + */ + unsigned long offset; + + /** + * Virtual address for accessing the buffer data. Only valid while + * mapped. + */ +#ifdef __cplusplus + void *virt; +#else + void *virtual; +#endif + + /** Buffer manager context associated with this buffer object */ + drm_intel_bufmgr *bufmgr; + + /** + * MM-specific handle for accessing object + */ + int handle; + + /** + * Last seen card virtual address (offset from the beginning of the + * aperture) for the object. This should be used to fill relocation + * entries when calling drm_intel_bo_emit_reloc() + */ + uint64_t offset64; +}; + +enum aub_dump_bmp_format { + AUB_DUMP_BMP_FORMAT_8BIT = 1, + AUB_DUMP_BMP_FORMAT_ARGB_4444 = 4, + AUB_DUMP_BMP_FORMAT_ARGB_0888 = 6, + AUB_DUMP_BMP_FORMAT_ARGB_8888 = 7, +}; + +typedef struct _drm_intel_aub_annotation { + uint32_t type; + uint32_t subtype; + uint32_t ending_offset; +} drm_intel_aub_annotation; + +#define BO_ALLOC_FOR_RENDER (1<<0) + +drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment); +drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment); +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, + void *addr, uint32_t tiling_mode, + uint32_t stride, unsigned long size, + unsigned long flags); +drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, + const char *name, + int x, int y, int cpp, + uint32_t *tiling_mode, + unsigned long *pitch, + unsigned long flags); +void drm_intel_bo_reference(drm_intel_bo *bo); +void drm_intel_bo_unreference(drm_intel_bo *bo); +int drm_intel_bo_map(drm_intel_bo *bo, int write_enable); +int drm_intel_bo_unmap(drm_intel_bo *bo); + +int drm_intel_bo_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, const void *data); +int drm_intel_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, void *data); +void drm_intel_bo_wait_rendering(drm_intel_bo *bo); + +void drm_intel_bufmgr_set_debug(drm_intel_bufmgr *bufmgr, int enable_debug); +void drm_intel_bufmgr_destroy(drm_intel_bufmgr *bufmgr); +int drm_intel_bo_exec(drm_intel_bo *bo, int used, + struct drm_clip_rect *cliprects, int num_cliprects, int DR4); +int drm_intel_bo_mrb_exec(drm_intel_bo *bo, int used, + struct drm_clip_rect *cliprects, int num_cliprects, int DR4, + unsigned int flags); +int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count); + +int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain); +int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, + uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain); +int drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment); +int drm_intel_bo_unpin(drm_intel_bo *bo); +int drm_intel_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t stride); +int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode); +int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name); +int drm_intel_bo_busy(drm_intel_bo *bo); +int drm_intel_bo_madvise(drm_intel_bo *bo, int madv); +int drm_intel_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable); +int drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset); + +int drm_intel_bo_disable_reuse(drm_intel_bo *bo); +int drm_intel_bo_is_reusable(drm_intel_bo *bo); +int drm_intel_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo); + +/* drm_intel_bufmgr_gem.c */ +drm_intel_bufmgr *drm_intel_bufmgr_gem_init(int fd, int batch_size); +drm_intel_bo *drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned int handle); +void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr); +void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr); +void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, + int limit); +int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo); +int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo); +int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo); + +#define HAVE_DRM_INTEL_GEM_BO_DISABLE_IMPLICIT_SYNC 1 +int drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr); +void drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo); +void drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo); + +void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo); +void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo); +void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo); + +int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo); +void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start); +void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable); + +void +drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, + const char *filename); +void drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable); +void drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset); +void +drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, + drm_intel_aub_annotation *annotations, + unsigned count); + +int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id); + +int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total); +int drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr); +int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns); + +drm_intel_context *drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr); +int drm_intel_gem_context_get_id(drm_intel_context *ctx, + uint32_t *ctx_id); +void drm_intel_gem_context_destroy(drm_intel_context *ctx); +int drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, + int used, unsigned int flags); +int drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, + drm_intel_context *ctx, + int used, + int in_fence, + int *out_fence, + unsigned int flags); + +int drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd); +drm_intel_bo *drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, + int prime_fd, int size); + +/* drm_intel_bufmgr_fake.c */ +drm_intel_bufmgr *drm_intel_bufmgr_fake_init(int fd, + unsigned long low_offset, + void *low_virtual, + unsigned long size, + volatile unsigned int + *last_dispatch); +void drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr *bufmgr, + volatile unsigned int + *last_dispatch); +void drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr *bufmgr, + int (*exec) (drm_intel_bo *bo, + unsigned int used, + void *priv), + void *priv); +void drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr *bufmgr, + unsigned int (*emit) (void *priv), + void (*wait) (unsigned int fence, + void *priv), + void *priv); +drm_intel_bo *drm_intel_bo_fake_alloc_static(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long offset, + unsigned long size, void *virt); +void drm_intel_bo_fake_disable_backing_store(drm_intel_bo *bo, + void (*invalidate_cb) (drm_intel_bo + * bo, + void *ptr), + void *ptr); + +void drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr *bufmgr); +void drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr *bufmgr); + +struct drm_intel_decode *drm_intel_decode_context_alloc(uint32_t devid); +void drm_intel_decode_context_free(struct drm_intel_decode *ctx); +void drm_intel_decode_set_batch_pointer(struct drm_intel_decode *ctx, + void *data, uint32_t hw_offset, + int count); +void drm_intel_decode_set_dump_past_end(struct drm_intel_decode *ctx, + int dump_past_end); +void drm_intel_decode_set_head_tail(struct drm_intel_decode *ctx, + uint32_t head, uint32_t tail); +void drm_intel_decode_set_output_file(struct drm_intel_decode *ctx, FILE *out); +void drm_intel_decode(struct drm_intel_decode *ctx); + +int drm_intel_reg_read(drm_intel_bufmgr *bufmgr, + uint32_t offset, + uint64_t *result); + +int drm_intel_get_reset_stats(drm_intel_context *ctx, + uint32_t *reset_count, + uint32_t *active, + uint32_t *pending); + +int drm_intel_get_subslice_total(int fd, unsigned int *subslice_total); +int drm_intel_get_eu_total(int fd, unsigned int *eu_total); + +int drm_intel_get_pooled_eu(int fd); +int drm_intel_get_min_eu_in_pool(int fd); + +/** @{ Compatibility defines to keep old code building despite the symbol rename + * from dri_* to drm_intel_* + */ +#define dri_bo drm_intel_bo +#define dri_bufmgr drm_intel_bufmgr +#define dri_bo_alloc drm_intel_bo_alloc +#define dri_bo_reference drm_intel_bo_reference +#define dri_bo_unreference drm_intel_bo_unreference +#define dri_bo_map drm_intel_bo_map +#define dri_bo_unmap drm_intel_bo_unmap +#define dri_bo_subdata drm_intel_bo_subdata +#define dri_bo_get_subdata drm_intel_bo_get_subdata +#define dri_bo_wait_rendering drm_intel_bo_wait_rendering +#define dri_bufmgr_set_debug drm_intel_bufmgr_set_debug +#define dri_bufmgr_destroy drm_intel_bufmgr_destroy +#define dri_bo_exec drm_intel_bo_exec +#define dri_bufmgr_check_aperture_space drm_intel_bufmgr_check_aperture_space +#define dri_bo_emit_reloc(reloc_bo, read, write, target_offset, \ + reloc_offset, target_bo) \ + drm_intel_bo_emit_reloc(reloc_bo, reloc_offset, \ + target_bo, target_offset, \ + read, write); +#define dri_bo_pin drm_intel_bo_pin +#define dri_bo_unpin drm_intel_bo_unpin +#define dri_bo_get_tiling drm_intel_bo_get_tiling +#define dri_bo_set_tiling(bo, mode) drm_intel_bo_set_tiling(bo, mode, 0) +#define dri_bo_flink drm_intel_bo_flink +#define intel_bufmgr_gem_init drm_intel_bufmgr_gem_init +#define intel_bo_gem_create_from_name drm_intel_bo_gem_create_from_name +#define intel_bufmgr_gem_enable_reuse drm_intel_bufmgr_gem_enable_reuse +#define intel_bufmgr_fake_init drm_intel_bufmgr_fake_init +#define intel_bufmgr_fake_set_last_dispatch drm_intel_bufmgr_fake_set_last_dispatch +#define intel_bufmgr_fake_set_exec_callback drm_intel_bufmgr_fake_set_exec_callback +#define intel_bufmgr_fake_set_fence_callback drm_intel_bufmgr_fake_set_fence_callback +#define intel_bo_fake_alloc_static drm_intel_bo_fake_alloc_static +#define intel_bo_fake_disable_backing_store drm_intel_bo_fake_disable_backing_store +#define intel_bufmgr_fake_contended_lock_take drm_intel_bufmgr_fake_contended_lock_take +#define intel_bufmgr_fake_evict_all drm_intel_bufmgr_fake_evict_all + +/** @{ */ + +#if defined(__cplusplus) +} +#endif + +#endif /* INTEL_BUFMGR_H */ diff --git a/intel/intel_bufmgr_fake.c b/intel/intel_bufmgr_fake.c new file mode 100644 index 0000000..0cec51f --- /dev/null +++ b/intel/intel_bufmgr_fake.c @@ -0,0 +1,1626 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Originally a fake version of the buffer manager so that we can + * prototype the changes in a driver fairly quickly, has been fleshed + * out to a fully functional interim solution. + * + * Basically wraps the old style memory management in the new + * programming interface, but is more expressive and avoids many of + * the bugs in the old texture manager. + */ + +#include <stdlib.h> +#include <string.h> +#include <assert.h> +#include <errno.h> +#include <strings.h> +#include <xf86drm.h> +#include <pthread.h> +#include "intel_bufmgr.h" +#include "intel_bufmgr_priv.h" +#include "drm.h" +#include "i915_drm.h" +#include "mm.h" +#include "libdrm_macros.h" +#include "libdrm_lists.h" + +#define DBG(...) do { \ + if (bufmgr_fake->bufmgr.debug) \ + drmMsg(__VA_ARGS__); \ +} while (0) + +/* Internal flags: + */ +#define BM_NO_BACKING_STORE 0x00000001 +#define BM_NO_FENCE_SUBDATA 0x00000002 +#define BM_PINNED 0x00000004 + +/* Wrapper around mm.c's mem_block, which understands that you must + * wait for fences to expire before memory can be freed. This is + * specific to our use of memcpy for uploads - an upload that was + * processed through the command queue wouldn't need to care about + * fences. + */ +#define MAX_RELOCS 4096 + +struct fake_buffer_reloc { + /** Buffer object that the relocation points at. */ + drm_intel_bo *target_buf; + /** Offset of the relocation entry within reloc_buf. */ + uint32_t offset; + /** + * Cached value of the offset when we last performed this relocation. + */ + uint32_t last_target_offset; + /** Value added to target_buf's offset to get the relocation entry. */ + uint32_t delta; + /** Cache domains the target buffer is read into. */ + uint32_t read_domains; + /** Cache domain the target buffer will have dirty cachelines in. */ + uint32_t write_domain; +}; + +struct block { + struct block *next, *prev; + struct mem_block *mem; /* BM_MEM_AGP */ + + /** + * Marks that the block is currently in the aperture and has yet to be + * fenced. + */ + unsigned on_hardware:1; + /** + * Marks that the block is currently fenced (being used by rendering) + * and can't be freed until @fence is passed. + */ + unsigned fenced:1; + + /** Fence cookie for the block. */ + unsigned fence; /* Split to read_fence, write_fence */ + + drm_intel_bo *bo; + void *virtual; +}; + +typedef struct _bufmgr_fake { + drm_intel_bufmgr bufmgr; + + pthread_mutex_t lock; + + unsigned long low_offset; + unsigned long size; + void *virtual; + + struct mem_block *heap; + + unsigned buf_nr; /* for generating ids */ + + /** + * List of blocks which are currently in the GART but haven't been + * fenced yet. + */ + struct block on_hardware; + /** + * List of blocks which are in the GART and have an active fence on + * them. + */ + struct block fenced; + /** + * List of blocks which have an expired fence and are ready to be + * evicted. + */ + struct block lru; + + unsigned int last_fence; + + unsigned fail:1; + unsigned need_fence:1; + int thrashing; + + /** + * Driver callback to emit a fence, returning the cookie. + * + * This allows the driver to hook in a replacement for the DRM usage in + * bufmgr_fake. + * + * Currently, this also requires that a write flush be emitted before + * emitting the fence, but this should change. + */ + unsigned int (*fence_emit) (void *private); + /** Driver callback to wait for a fence cookie to have passed. */ + void (*fence_wait) (unsigned int fence, void *private); + void *fence_priv; + + /** + * Driver callback to execute a buffer. + * + * This allows the driver to hook in a replacement for the DRM usage in + * bufmgr_fake. + */ + int (*exec) (drm_intel_bo *bo, unsigned int used, void *priv); + void *exec_priv; + + /** Driver-supplied argument to driver callbacks */ + void *driver_priv; + /** + * Pointer to kernel-updated sarea data for the last completed user irq + */ + volatile int *last_dispatch; + + int fd; + + int debug; + + int performed_rendering; +} drm_intel_bufmgr_fake; + +typedef struct _drm_intel_bo_fake { + drm_intel_bo bo; + + unsigned id; /* debug only */ + const char *name; + + unsigned dirty:1; + /** + * has the card written to this buffer - we make need to copy it back + */ + unsigned card_dirty:1; + unsigned int refcount; + /* Flags may consist of any of the DRM_BO flags, plus + * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the + * first two driver private flags. + */ + uint64_t flags; + /** Cache domains the target buffer is read into. */ + uint32_t read_domains; + /** Cache domain the target buffer will have dirty cachelines in. */ + uint32_t write_domain; + + unsigned int alignment; + int is_static, validated; + unsigned int map_count; + + /** relocation list */ + struct fake_buffer_reloc *relocs; + int nr_relocs; + /** + * Total size of the target_bos of this buffer. + * + * Used for estimation in check_aperture. + */ + unsigned int child_size; + + struct block *block; + void *backing_store; + void (*invalidate_cb) (drm_intel_bo *bo, void *ptr); + void *invalidate_ptr; +} drm_intel_bo_fake; + +static int clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake, + unsigned int fence_cookie); + +#define MAXFENCE 0x7fffffff + +static int +FENCE_LTE(unsigned a, unsigned b) +{ + if (a == b) + return 1; + + if (a < b && b - a < (1 << 24)) + return 1; + + if (a > b && MAXFENCE - a + b < (1 << 24)) + return 1; + + return 0; +} + +drm_public void +drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr *bufmgr, + unsigned int (*emit) (void *priv), + void (*wait) (unsigned int fence, + void *priv), + void *priv) +{ + drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr; + + bufmgr_fake->fence_emit = emit; + bufmgr_fake->fence_wait = wait; + bufmgr_fake->fence_priv = priv; +} + +static unsigned int +_fence_emit_internal(drm_intel_bufmgr_fake *bufmgr_fake) +{ + struct drm_i915_irq_emit ie; + int ret, seq = 1; + + if (bufmgr_fake->fence_emit != NULL) { + seq = bufmgr_fake->fence_emit(bufmgr_fake->fence_priv); + return seq; + } + + ie.irq_seq = &seq; + ret = drmCommandWriteRead(bufmgr_fake->fd, DRM_I915_IRQ_EMIT, + &ie, sizeof(ie)); + if (ret) { + drmMsg("%s: drm_i915_irq_emit: %d\n", __func__, ret); + abort(); + } + + DBG("emit 0x%08x\n", seq); + return seq; +} + +static void +_fence_wait_internal(drm_intel_bufmgr_fake *bufmgr_fake, int seq) +{ + struct drm_i915_irq_wait iw; + int hw_seq, busy_count = 0; + int ret; + int kernel_lied; + + if (bufmgr_fake->fence_wait != NULL) { + bufmgr_fake->fence_wait(seq, bufmgr_fake->fence_priv); + clear_fenced(bufmgr_fake, seq); + return; + } + + iw.irq_seq = seq; + + DBG("wait 0x%08x\n", iw.irq_seq); + + /* The kernel IRQ_WAIT implementation is all sorts of broken. + * 1) It returns 1 to 0x7fffffff instead of using the full 32-bit + * unsigned range. + * 2) It returns 0 if hw_seq >= seq, not seq - hw_seq < 0 on the 32-bit + * signed range. + * 3) It waits if seq < hw_seq, not seq - hw_seq > 0 on the 32-bit + * signed range. + * 4) It returns -EBUSY in 3 seconds even if the hardware is still + * successfully chewing through buffers. + * + * Assume that in userland we treat sequence numbers as ints, which + * makes some of the comparisons convenient, since the sequence + * numbers are all positive signed integers. + * + * From this we get several cases we need to handle. Here's a timeline. + * 0x2 0x7 0x7ffffff8 0x7ffffffd + * | | | | + * ------------------------------------------------------------ + * + * A) Normal wait for hw to catch up + * hw_seq seq + * | | + * ------------------------------------------------------------ + * seq - hw_seq = 5. If we call IRQ_WAIT, it will wait for hw to + * catch up. + * + * B) Normal wait for a sequence number that's already passed. + * seq hw_seq + * | | + * ------------------------------------------------------------ + * seq - hw_seq = -5. If we call IRQ_WAIT, it returns 0 quickly. + * + * C) Hardware has already wrapped around ahead of us + * hw_seq seq + * | | + * ------------------------------------------------------------ + * seq - hw_seq = 0x80000000 - 5. If we called IRQ_WAIT, it would wait + * for hw_seq >= seq, which may never occur. Thus, we want to catch + * this in userland and return 0. + * + * D) We've wrapped around ahead of the hardware. + * seq hw_seq + * | | + * ------------------------------------------------------------ + * seq - hw_seq = -(0x80000000 - 5). If we called IRQ_WAIT, it would + * return 0 quickly because hw_seq >= seq, even though the hardware + * isn't caught up. Thus, we need to catch this early return in + * userland and bother the kernel until the hardware really does + * catch up. + * + * E) Hardware might wrap after we test in userland. + * hw_seq seq + * | | + * ------------------------------------------------------------ + * seq - hw_seq = 5. If we call IRQ_WAIT, it will likely see seq >= + * hw_seq and wait. However, suppose hw_seq wraps before we make it + * into the kernel. The kernel sees hw_seq >= seq and waits for 3 + * seconds then returns -EBUSY. This is case C). We should catch + * this and then return successfully. + * + * F) Hardware might take a long time on a buffer. + * hw_seq seq + * | | + * ------------------------------------------------------------------- + * seq - hw_seq = 5. If we call IRQ_WAIT, if sequence 2 through 5 + * take too long, it will return -EBUSY. Batchbuffers in the + * gltestperf demo were seen to take up to 7 seconds. We should + * catch early -EBUSY return and keep trying. + */ + + do { + /* Keep a copy of last_dispatch so that if the wait -EBUSYs + * because the hardware didn't catch up in 3 seconds, we can + * see if it at least made progress and retry. + */ + hw_seq = *bufmgr_fake->last_dispatch; + + /* Catch case C */ + if (seq - hw_seq > 0x40000000) + return; + + ret = drmCommandWrite(bufmgr_fake->fd, DRM_I915_IRQ_WAIT, + &iw, sizeof(iw)); + /* Catch case D */ + kernel_lied = (ret == 0) && (seq - *bufmgr_fake->last_dispatch < + -0x40000000); + + /* Catch case E */ + if (ret == -EBUSY + && (seq - *bufmgr_fake->last_dispatch > 0x40000000)) + ret = 0; + + /* Catch case F: Allow up to 15 seconds chewing on one buffer. */ + if ((ret == -EBUSY) && (hw_seq != *bufmgr_fake->last_dispatch)) + busy_count = 0; + else + busy_count++; + } while (kernel_lied || ret == -EAGAIN || ret == -EINTR || + (ret == -EBUSY && busy_count < 5)); + + if (ret != 0) { + drmMsg("%s:%d: Error waiting for fence: %s.\n", __FILE__, + __LINE__, strerror(-ret)); + abort(); + } + clear_fenced(bufmgr_fake, seq); +} + +static int +_fence_test(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence) +{ + /* Slight problem with wrap-around: + */ + return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence); +} + +/** + * Allocate a memory manager block for the buffer. + */ +static int +alloc_block(drm_intel_bo *bo) +{ + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + struct block *block = (struct block *)calloc(sizeof *block, 1); + unsigned int align_log2 = ffs(bo_fake->alignment) - 1; + unsigned int sz; + + if (!block) + return 1; + + sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1); + + block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0); + if (!block->mem) { + free(block); + return 0; + } + + DRMINITLISTHEAD(block); + + /* Insert at head or at tail??? */ + DRMLISTADDTAIL(block, &bufmgr_fake->lru); + + block->virtual = (uint8_t *) bufmgr_fake->virtual + + block->mem->ofs - bufmgr_fake->low_offset; + block->bo = bo; + + bo_fake->block = block; + + return 1; +} + +/* Release the card storage associated with buf: + */ +static void +free_block(drm_intel_bufmgr_fake *bufmgr_fake, struct block *block, + int skip_dirty_copy) +{ + drm_intel_bo_fake *bo_fake; + DBG("free block %p %08x %d %d\n", block, block->mem->ofs, + block->on_hardware, block->fenced); + + if (!block) + return; + + bo_fake = (drm_intel_bo_fake *) block->bo; + + if (bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)) + skip_dirty_copy = 1; + + if (!skip_dirty_copy && (bo_fake->card_dirty == 1)) { + memcpy(bo_fake->backing_store, block->virtual, block->bo->size); + bo_fake->card_dirty = 0; + bo_fake->dirty = 1; + } + + if (block->on_hardware) { + block->bo = NULL; + } else if (block->fenced) { + block->bo = NULL; + } else { + DBG(" - free immediately\n"); + DRMLISTDEL(block); + + mmFreeMem(block->mem); + free(block); + } +} + +static void +alloc_backing_store(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + assert(!bo_fake->backing_store); + assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE))); + + bo_fake->backing_store = malloc(bo->size); + + DBG("alloc_backing - buf %d %p %lu\n", bo_fake->id, + bo_fake->backing_store, bo->size); + assert(bo_fake->backing_store); +} + +static void +free_backing_store(drm_intel_bo *bo) +{ + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + if (bo_fake->backing_store) { + assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE))); + free(bo_fake->backing_store); + bo_fake->backing_store = NULL; + } +} + +static void +set_dirty(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + if (bo_fake->flags & BM_NO_BACKING_STORE + && bo_fake->invalidate_cb != NULL) + bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr); + + assert(!(bo_fake->flags & BM_PINNED)); + + DBG("set_dirty - buf %d\n", bo_fake->id); + bo_fake->dirty = 1; +} + +static int +evict_lru(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int max_fence) +{ + struct block *block, *tmp; + + DBG("%s\n", __func__); + + DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) { + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo; + + if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) + continue; + + if (block->fence && max_fence && !FENCE_LTE(block->fence, + max_fence)) + return 0; + + set_dirty(&bo_fake->bo); + bo_fake->block = NULL; + + free_block(bufmgr_fake, block, 0); + return 1; + } + + return 0; +} + +static int +evict_mru(drm_intel_bufmgr_fake *bufmgr_fake) +{ + struct block *block, *tmp; + + DBG("%s\n", __func__); + + DRMLISTFOREACHSAFEREVERSE(block, tmp, &bufmgr_fake->lru) { + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo; + + if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA)) + continue; + + set_dirty(&bo_fake->bo); + bo_fake->block = NULL; + + free_block(bufmgr_fake, block, 0); + return 1; + } + + return 0; +} + +/** + * Removes all objects from the fenced list older than the given fence. + */ +static int +clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int fence_cookie) +{ + struct block *block, *tmp; + int ret = 0; + + bufmgr_fake->last_fence = fence_cookie; + DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->fenced) { + assert(block->fenced); + + if (_fence_test(bufmgr_fake, block->fence)) { + + block->fenced = 0; + + if (!block->bo) { + DBG("delayed free: offset %x sz %x\n", + block->mem->ofs, block->mem->size); + DRMLISTDEL(block); + mmFreeMem(block->mem); + free(block); + } else { + DBG("return to lru: offset %x sz %x\n", + block->mem->ofs, block->mem->size); + DRMLISTDEL(block); + DRMLISTADDTAIL(block, &bufmgr_fake->lru); + } + + ret = 1; + } else { + /* Blocks are ordered by fence, so if one fails, all + * from here will fail also: + */ + DBG("fence not passed: offset %x sz %x %d %d \n", + block->mem->ofs, block->mem->size, block->fence, + bufmgr_fake->last_fence); + break; + } + } + + DBG("%s: %d\n", __func__, ret); + return ret; +} + +static void +fence_blocks(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence) +{ + struct block *block, *tmp; + + DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) { + DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n", + block, block->mem->size, block->mem->ofs, block->bo, fence); + block->fence = fence; + + block->on_hardware = 0; + block->fenced = 1; + + /* Move to tail of pending list here + */ + DRMLISTDEL(block); + DRMLISTADDTAIL(block, &bufmgr_fake->fenced); + } + + assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware)); +} + +static int +evict_and_alloc_block(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + assert(bo_fake->block == NULL); + + /* Search for already free memory: + */ + if (alloc_block(bo)) + return 1; + + /* If we're not thrashing, allow lru eviction to dig deeper into + * recently used textures. We'll probably be thrashing soon: + */ + if (!bufmgr_fake->thrashing) { + while (evict_lru(bufmgr_fake, 0)) + if (alloc_block(bo)) + return 1; + } + + /* Keep thrashing counter alive? + */ + if (bufmgr_fake->thrashing) + bufmgr_fake->thrashing = 20; + + /* Wait on any already pending fences - here we are waiting for any + * freed memory that has been submitted to hardware and fenced to + * become available: + */ + while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) { + uint32_t fence = bufmgr_fake->fenced.next->fence; + _fence_wait_internal(bufmgr_fake, fence); + + if (alloc_block(bo)) + return 1; + } + + if (!DRMLISTEMPTY(&bufmgr_fake->on_hardware)) { + while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) { + uint32_t fence = bufmgr_fake->fenced.next->fence; + _fence_wait_internal(bufmgr_fake, fence); + } + + if (!bufmgr_fake->thrashing) { + DBG("thrashing\n"); + } + bufmgr_fake->thrashing = 20; + + if (alloc_block(bo)) + return 1; + } + + while (evict_mru(bufmgr_fake)) + if (alloc_block(bo)) + return 1; + + DBG("%s 0x%lx bytes failed\n", __func__, bo->size); + + return 0; +} + +/*********************************************************************** + * Public functions + */ + +/** + * Wait for hardware idle by emitting a fence and waiting for it. + */ +static void +drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake *bufmgr_fake) +{ + unsigned int cookie; + + cookie = _fence_emit_internal(bufmgr_fake); + _fence_wait_internal(bufmgr_fake, cookie); +} + +/** + * Wait for rendering to a buffer to complete. + * + * It is assumed that the batchbuffer which performed the rendering included + * the necessary flushing. + */ +static void +drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + if (bo_fake->block == NULL || !bo_fake->block->fenced) + return; + + _fence_wait_internal(bufmgr_fake, bo_fake->block->fence); +} + +static void +drm_intel_fake_bo_wait_rendering(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + + pthread_mutex_lock(&bufmgr_fake->lock); + drm_intel_fake_bo_wait_rendering_locked(bo); + pthread_mutex_unlock(&bufmgr_fake->lock); +} + +/* Specifically ignore texture memory sharing. + * -- just evict everything + * -- and wait for idle + */ +drm_public void +drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr; + struct block *block, *tmp; + + pthread_mutex_lock(&bufmgr_fake->lock); + + bufmgr_fake->need_fence = 1; + bufmgr_fake->fail = 0; + + /* Wait for hardware idle. We don't know where acceleration has been + * happening, so we'll need to wait anyway before letting anything get + * put on the card again. + */ + drm_intel_bufmgr_fake_wait_idle(bufmgr_fake); + + /* Check that we hadn't released the lock without having fenced the last + * set of buffers. + */ + assert(DRMLISTEMPTY(&bufmgr_fake->fenced)); + assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware)); + + DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) { + assert(_fence_test(bufmgr_fake, block->fence)); + set_dirty(block->bo); + } + + pthread_mutex_unlock(&bufmgr_fake->lock); +} + +static drm_intel_bo * +drm_intel_fake_bo_alloc(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment) +{ + drm_intel_bufmgr_fake *bufmgr_fake; + drm_intel_bo_fake *bo_fake; + + bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr; + + assert(size != 0); + + bo_fake = calloc(1, sizeof(*bo_fake)); + if (!bo_fake) + return NULL; + + bo_fake->bo.size = size; + bo_fake->bo.offset = -1; + bo_fake->bo.virtual = NULL; + bo_fake->bo.bufmgr = bufmgr; + bo_fake->refcount = 1; + + /* Alignment must be a power of two */ + assert((alignment & (alignment - 1)) == 0); + if (alignment == 0) + alignment = 1; + bo_fake->alignment = alignment; + bo_fake->id = ++bufmgr_fake->buf_nr; + bo_fake->name = name; + bo_fake->flags = 0; + bo_fake->is_static = 0; + + DBG("drm_bo_alloc: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + return &bo_fake->bo; +} + +static drm_intel_bo * +drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr, + const char *name, + int x, int y, int cpp, + uint32_t *tiling_mode, + unsigned long *pitch, + unsigned long flags) +{ + unsigned long stride, aligned_y; + + /* No runtime tiling support for fake. */ + *tiling_mode = I915_TILING_NONE; + + /* Align it for being a render target. Shouldn't need anything else. */ + stride = x * cpp; + stride = ROUND_UP_TO(stride, 64); + + /* 965 subspan loading alignment */ + aligned_y = ALIGN(y, 2); + + *pitch = stride; + + return drm_intel_fake_bo_alloc(bufmgr, name, stride * aligned_y, + 4096); +} + +drm_public drm_intel_bo * +drm_intel_bo_fake_alloc_static(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long offset, + unsigned long size, void *virtual) +{ + drm_intel_bufmgr_fake *bufmgr_fake; + drm_intel_bo_fake *bo_fake; + + bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr; + + assert(size != 0); + + bo_fake = calloc(1, sizeof(*bo_fake)); + if (!bo_fake) + return NULL; + + bo_fake->bo.size = size; + bo_fake->bo.offset = offset; + bo_fake->bo.virtual = virtual; + bo_fake->bo.bufmgr = bufmgr; + bo_fake->refcount = 1; + bo_fake->id = ++bufmgr_fake->buf_nr; + bo_fake->name = name; + bo_fake->flags = BM_PINNED; + bo_fake->is_static = 1; + + DBG("drm_bo_alloc_static: (buf %d: %s, %lu kb)\n", bo_fake->id, + bo_fake->name, bo_fake->bo.size / 1024); + + return &bo_fake->bo; +} + +static void +drm_intel_fake_bo_reference(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + pthread_mutex_lock(&bufmgr_fake->lock); + bo_fake->refcount++; + pthread_mutex_unlock(&bufmgr_fake->lock); +} + +static void +drm_intel_fake_bo_reference_locked(drm_intel_bo *bo) +{ + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + bo_fake->refcount++; +} + +static void +drm_intel_fake_bo_unreference_locked(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + int i; + + if (--bo_fake->refcount == 0) { + assert(bo_fake->map_count == 0); + /* No remaining references, so free it */ + if (bo_fake->block) + free_block(bufmgr_fake, bo_fake->block, 1); + free_backing_store(bo); + + for (i = 0; i < bo_fake->nr_relocs; i++) + drm_intel_fake_bo_unreference_locked(bo_fake->relocs[i]. + target_buf); + + DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id, + bo_fake->name); + + free(bo_fake->relocs); + free(bo); + } +} + +static void +drm_intel_fake_bo_unreference(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + + pthread_mutex_lock(&bufmgr_fake->lock); + drm_intel_fake_bo_unreference_locked(bo); + pthread_mutex_unlock(&bufmgr_fake->lock); +} + +/** + * Set the buffer as not requiring backing store, and instead get the callback + * invoked whenever it would be set dirty. + */ +drm_public void +drm_intel_bo_fake_disable_backing_store(drm_intel_bo *bo, + void (*invalidate_cb) (drm_intel_bo *bo, + void *ptr), + void *ptr) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + pthread_mutex_lock(&bufmgr_fake->lock); + + if (bo_fake->backing_store) + free_backing_store(bo); + + bo_fake->flags |= BM_NO_BACKING_STORE; + + DBG("disable_backing_store set buf %d dirty\n", bo_fake->id); + bo_fake->dirty = 1; + bo_fake->invalidate_cb = invalidate_cb; + bo_fake->invalidate_ptr = ptr; + + /* Note that it is invalid right from the start. Also note + * invalidate_cb is called with the bufmgr locked, so cannot + * itself make bufmgr calls. + */ + if (invalidate_cb != NULL) + invalidate_cb(bo, ptr); + + pthread_mutex_unlock(&bufmgr_fake->lock); +} + +/** + * Map a buffer into bo->virtual, allocating either card memory space (If + * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary. + */ +static int + drm_intel_fake_bo_map_locked(drm_intel_bo *bo, int write_enable) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + /* Static buffers are always mapped. */ + if (bo_fake->is_static) { + if (bo_fake->card_dirty) { + drm_intel_bufmgr_fake_wait_idle(bufmgr_fake); + bo_fake->card_dirty = 0; + } + return 0; + } + + /* Allow recursive mapping. Mesa may recursively map buffers with + * nested display loops, and it is used internally in bufmgr_fake + * for relocation. + */ + if (bo_fake->map_count++ != 0) + return 0; + + { + DBG("drm_bo_map: (buf %d: %s, %lu kb)\n", bo_fake->id, + bo_fake->name, bo_fake->bo.size / 1024); + + if (bo->virtual != NULL) { + drmMsg("%s: already mapped\n", __func__); + abort(); + } else if (bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)) { + + if (!bo_fake->block && !evict_and_alloc_block(bo)) { + DBG("%s: alloc failed\n", __func__); + bufmgr_fake->fail = 1; + return 1; + } else { + assert(bo_fake->block); + bo_fake->dirty = 0; + + if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) && + bo_fake->block->fenced) { + drm_intel_fake_bo_wait_rendering_locked + (bo); + } + + bo->virtual = bo_fake->block->virtual; + } + } else { + if (write_enable) + set_dirty(bo); + + if (bo_fake->backing_store == 0) + alloc_backing_store(bo); + + if ((bo_fake->card_dirty == 1) && bo_fake->block) { + if (bo_fake->block->fenced) + drm_intel_fake_bo_wait_rendering_locked + (bo); + + memcpy(bo_fake->backing_store, + bo_fake->block->virtual, + bo_fake->block->bo->size); + bo_fake->card_dirty = 0; + } + + bo->virtual = bo_fake->backing_store; + } + } + + return 0; +} + +static int + drm_intel_fake_bo_map(drm_intel_bo *bo, int write_enable) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + int ret; + + pthread_mutex_lock(&bufmgr_fake->lock); + ret = drm_intel_fake_bo_map_locked(bo, write_enable); + pthread_mutex_unlock(&bufmgr_fake->lock); + + return ret; +} + +static int + drm_intel_fake_bo_unmap_locked(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + /* Static buffers are always mapped. */ + if (bo_fake->is_static) + return 0; + + assert(bo_fake->map_count != 0); + if (--bo_fake->map_count != 0) + return 0; + + DBG("drm_bo_unmap: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name, + bo_fake->bo.size / 1024); + + bo->virtual = NULL; + + return 0; +} + +static int drm_intel_fake_bo_unmap(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + int ret; + + pthread_mutex_lock(&bufmgr_fake->lock); + ret = drm_intel_fake_bo_unmap_locked(bo); + pthread_mutex_unlock(&bufmgr_fake->lock); + + return ret; +} + +static int +drm_intel_fake_bo_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, const void *data) +{ + int ret; + + if (size == 0 || data == NULL) + return 0; + + ret = drm_intel_bo_map(bo, 1); + if (ret) + return ret; + memcpy((unsigned char *)bo->virtual + offset, data, size); + drm_intel_bo_unmap(bo); + return 0; +} + +static void + drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake *bufmgr_fake) +{ + struct block *block, *tmp; + + bufmgr_fake->performed_rendering = 0; + /* okay for ever BO that is on the HW kick it off. + seriously not afraid of the POLICE right now */ + DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) { + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo; + + block->on_hardware = 0; + free_block(bufmgr_fake, block, 0); + bo_fake->block = NULL; + bo_fake->validated = 0; + if (!(bo_fake->flags & BM_NO_BACKING_STORE)) + bo_fake->dirty = 1; + } + +} + +static int + drm_intel_fake_bo_validate(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + + bufmgr_fake = (drm_intel_bufmgr_fake *) bo->bufmgr; + + DBG("drm_bo_validate: (buf %d: %s, %lu kb)\n", bo_fake->id, + bo_fake->name, bo_fake->bo.size / 1024); + + /* Sanity check: Buffers should be unmapped before being validated. + * This is not so much of a problem for bufmgr_fake, but TTM refuses, + * and the problem is harder to debug there. + */ + assert(bo_fake->map_count == 0); + + if (bo_fake->is_static) { + /* Add it to the needs-fence list */ + bufmgr_fake->need_fence = 1; + return 0; + } + + /* Allocate the card memory */ + if (!bo_fake->block && !evict_and_alloc_block(bo)) { + bufmgr_fake->fail = 1; + DBG("Failed to validate buf %d:%s\n", bo_fake->id, + bo_fake->name); + return -1; + } + + assert(bo_fake->block); + assert(bo_fake->block->bo == &bo_fake->bo); + + bo->offset = bo_fake->block->mem->ofs; + + /* Upload the buffer contents if necessary */ + if (bo_fake->dirty) { + DBG("Upload dirty buf %d:%s, sz %lu offset 0x%x\n", bo_fake->id, + bo_fake->name, bo->size, bo_fake->block->mem->ofs); + + assert(!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED))); + + /* Actually, should be able to just wait for a fence on the + * memory, which we would be tracking when we free it. Waiting + * for idle is a sufficiently large hammer for now. + */ + drm_intel_bufmgr_fake_wait_idle(bufmgr_fake); + + /* we may never have mapped this BO so it might not have any + * backing store if this happens it should be rare, but 0 the + * card memory in any case */ + if (bo_fake->backing_store) + memcpy(bo_fake->block->virtual, bo_fake->backing_store, + bo->size); + else + memset(bo_fake->block->virtual, 0, bo->size); + + bo_fake->dirty = 0; + } + + bo_fake->block->fenced = 0; + bo_fake->block->on_hardware = 1; + DRMLISTDEL(bo_fake->block); + DRMLISTADDTAIL(bo_fake->block, &bufmgr_fake->on_hardware); + + bo_fake->validated = 1; + bufmgr_fake->need_fence = 1; + + return 0; +} + +static void +drm_intel_fake_fence_validated(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr; + unsigned int cookie; + + cookie = _fence_emit_internal(bufmgr_fake); + fence_blocks(bufmgr_fake, cookie); + + DBG("drm_fence_validated: 0x%08x cookie\n", cookie); +} + +static void +drm_intel_fake_destroy(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr; + + pthread_mutex_destroy(&bufmgr_fake->lock); + mmDestroy(bufmgr_fake->heap); + free(bufmgr); +} + +static int +drm_intel_fake_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + struct fake_buffer_reloc *r; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *) target_bo; + int i; + + pthread_mutex_lock(&bufmgr_fake->lock); + + assert(bo); + assert(target_bo); + + if (bo_fake->relocs == NULL) { + bo_fake->relocs = + malloc(sizeof(struct fake_buffer_reloc) * MAX_RELOCS); + } + + r = &bo_fake->relocs[bo_fake->nr_relocs++]; + + assert(bo_fake->nr_relocs <= MAX_RELOCS); + + drm_intel_fake_bo_reference_locked(target_bo); + + if (!target_fake->is_static) { + bo_fake->child_size += + ALIGN(target_bo->size, target_fake->alignment); + bo_fake->child_size += target_fake->child_size; + } + r->target_buf = target_bo; + r->offset = offset; + r->last_target_offset = target_bo->offset; + r->delta = target_offset; + r->read_domains = read_domains; + r->write_domain = write_domain; + + if (bufmgr_fake->debug) { + /* Check that a conflicting relocation hasn't already been + * emitted. + */ + for (i = 0; i < bo_fake->nr_relocs - 1; i++) { + struct fake_buffer_reloc *r2 = &bo_fake->relocs[i]; + + assert(r->offset != r2->offset); + } + } + + pthread_mutex_unlock(&bufmgr_fake->lock); + + return 0; +} + +/** + * Incorporates the validation flags associated with each relocation into + * the combined validation flags for the buffer on this batchbuffer submission. + */ +static void +drm_intel_fake_calculate_domains(drm_intel_bo *bo) +{ + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + int i; + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + drm_intel_bo_fake *target_fake = + (drm_intel_bo_fake *) r->target_buf; + + /* Do the same for the tree of buffers we depend on */ + drm_intel_fake_calculate_domains(r->target_buf); + + target_fake->read_domains |= r->read_domains; + target_fake->write_domain |= r->write_domain; + } +} + +static int +drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + int i, ret; + + assert(bo_fake->map_count == 0); + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + drm_intel_bo_fake *target_fake = + (drm_intel_bo_fake *) r->target_buf; + uint32_t reloc_data; + + /* Validate the target buffer if that hasn't been done. */ + if (!target_fake->validated) { + ret = + drm_intel_fake_reloc_and_validate_buffer(r->target_buf); + if (ret != 0) { + if (bo->virtual != NULL) + drm_intel_fake_bo_unmap_locked(bo); + return ret; + } + } + + /* Calculate the value of the relocation entry. */ + if (r->target_buf->offset != r->last_target_offset) { + reloc_data = r->target_buf->offset + r->delta; + + if (bo->virtual == NULL) + drm_intel_fake_bo_map_locked(bo, 1); + + *(uint32_t *) ((uint8_t *) bo->virtual + r->offset) = + reloc_data; + + r->last_target_offset = r->target_buf->offset; + } + } + + if (bo->virtual != NULL) + drm_intel_fake_bo_unmap_locked(bo); + + if (bo_fake->write_domain != 0) { + if (!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED))) { + if (bo_fake->backing_store == 0) + alloc_backing_store(bo); + } + bo_fake->card_dirty = 1; + bufmgr_fake->performed_rendering = 1; + } + + return drm_intel_fake_bo_validate(bo); +} + +static void +drm_intel_bo_fake_post_submit(drm_intel_bo *bo) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo; + int i; + + for (i = 0; i < bo_fake->nr_relocs; i++) { + struct fake_buffer_reloc *r = &bo_fake->relocs[i]; + drm_intel_bo_fake *target_fake = + (drm_intel_bo_fake *) r->target_buf; + + if (target_fake->validated) + drm_intel_bo_fake_post_submit(r->target_buf); + + DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n", + bo_fake->name, (uint32_t) bo->offset, r->offset, + target_fake->name, (uint32_t) r->target_buf->offset, + r->delta); + } + + assert(bo_fake->map_count == 0); + bo_fake->validated = 0; + bo_fake->read_domains = 0; + bo_fake->write_domain = 0; +} + +drm_public void +drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr *bufmgr, + int (*exec) (drm_intel_bo *bo, + unsigned int used, + void *priv), + void *priv) +{ + drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr; + + bufmgr_fake->exec = exec; + bufmgr_fake->exec_priv = priv; +} + +static int +drm_intel_fake_bo_exec(drm_intel_bo *bo, int used, + drm_clip_rect_t * cliprects, int num_cliprects, int DR4) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo->bufmgr; + drm_intel_bo_fake *batch_fake = (drm_intel_bo_fake *) bo; + struct drm_i915_batchbuffer batch; + int ret; + int retry_count = 0; + + pthread_mutex_lock(&bufmgr_fake->lock); + + bufmgr_fake->performed_rendering = 0; + + drm_intel_fake_calculate_domains(bo); + + batch_fake->read_domains = I915_GEM_DOMAIN_COMMAND; + + /* we've ran out of RAM so blow the whole lot away and retry */ +restart: + ret = drm_intel_fake_reloc_and_validate_buffer(bo); + if (bufmgr_fake->fail == 1) { + if (retry_count == 0) { + retry_count++; + drm_intel_fake_kick_all_locked(bufmgr_fake); + bufmgr_fake->fail = 0; + goto restart; + } else /* dump out the memory here */ + mmDumpMemInfo(bufmgr_fake->heap); + } + + assert(ret == 0); + + if (bufmgr_fake->exec != NULL) { + ret = bufmgr_fake->exec(bo, used, bufmgr_fake->exec_priv); + if (ret != 0) { + pthread_mutex_unlock(&bufmgr_fake->lock); + return ret; + } + } else { + batch.start = bo->offset; + batch.used = used; + batch.cliprects = cliprects; + batch.num_cliprects = num_cliprects; + batch.DR1 = 0; + batch.DR4 = DR4; + + if (drmCommandWrite + (bufmgr_fake->fd, DRM_I915_BATCHBUFFER, &batch, + sizeof(batch))) { + drmMsg("DRM_I915_BATCHBUFFER: %d\n", -errno); + pthread_mutex_unlock(&bufmgr_fake->lock); + return -errno; + } + } + + drm_intel_fake_fence_validated(bo->bufmgr); + + drm_intel_bo_fake_post_submit(bo); + + pthread_mutex_unlock(&bufmgr_fake->lock); + + return 0; +} + +/** + * Return an error if the list of BOs will exceed the aperture size. + * + * This is a rough guess and likely to fail, as during the validate sequence we + * may place a buffer in an inopportune spot early on and then fail to fit + * a set smaller than the aperture. + */ +static int +drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array, int count) +{ + drm_intel_bufmgr_fake *bufmgr_fake = + (drm_intel_bufmgr_fake *) bo_array[0]->bufmgr; + unsigned int sz = 0; + int i; + + for (i = 0; i < count; i++) { + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo_array[i]; + + if (bo_fake == NULL) + continue; + + if (!bo_fake->is_static) + sz += ALIGN(bo_array[i]->size, bo_fake->alignment); + sz += bo_fake->child_size; + } + + if (sz > bufmgr_fake->size) { + DBG("check_space: overflowed bufmgr size, %ukb vs %lukb\n", + sz / 1024, bufmgr_fake->size / 1024); + return -1; + } + + DBG("drm_check_space: sz %ukb vs bufgr %lukb\n", sz / 1024, + bufmgr_fake->size / 1024); + return 0; +} + +/** + * Evicts all buffers, waiting for fences to pass and copying contents out + * as necessary. + * + * Used by the X Server on LeaveVT, when the card memory is no longer our + * own. + */ +drm_public void +drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr; + struct block *block, *tmp; + + pthread_mutex_lock(&bufmgr_fake->lock); + + bufmgr_fake->need_fence = 1; + bufmgr_fake->fail = 0; + + /* Wait for hardware idle. We don't know where acceleration has been + * happening, so we'll need to wait anyway before letting anything get + * put on the card again. + */ + drm_intel_bufmgr_fake_wait_idle(bufmgr_fake); + + /* Check that we hadn't released the lock without having fenced the last + * set of buffers. + */ + assert(DRMLISTEMPTY(&bufmgr_fake->fenced)); + assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware)); + + DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) { + drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo; + /* Releases the memory, and memcpys dirty contents out if + * necessary. + */ + free_block(bufmgr_fake, block, 0); + bo_fake->block = NULL; + } + + pthread_mutex_unlock(&bufmgr_fake->lock); +} + +drm_public void +drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr *bufmgr, + volatile unsigned int + *last_dispatch) +{ + drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr; + + bufmgr_fake->last_dispatch = (volatile int *)last_dispatch; +} + +drm_public drm_intel_bufmgr * +drm_intel_bufmgr_fake_init(int fd, unsigned long low_offset, + void *low_virtual, unsigned long size, + volatile unsigned int *last_dispatch) +{ + drm_intel_bufmgr_fake *bufmgr_fake; + + bufmgr_fake = calloc(1, sizeof(*bufmgr_fake)); + + if (pthread_mutex_init(&bufmgr_fake->lock, NULL) != 0) { + free(bufmgr_fake); + return NULL; + } + + /* Initialize allocator */ + DRMINITLISTHEAD(&bufmgr_fake->fenced); + DRMINITLISTHEAD(&bufmgr_fake->on_hardware); + DRMINITLISTHEAD(&bufmgr_fake->lru); + + bufmgr_fake->low_offset = low_offset; + bufmgr_fake->virtual = low_virtual; + bufmgr_fake->size = size; + bufmgr_fake->heap = mmInit(low_offset, size); + + /* Hook in methods */ + bufmgr_fake->bufmgr.bo_alloc = drm_intel_fake_bo_alloc; + bufmgr_fake->bufmgr.bo_alloc_for_render = drm_intel_fake_bo_alloc; + bufmgr_fake->bufmgr.bo_alloc_tiled = drm_intel_fake_bo_alloc_tiled; + bufmgr_fake->bufmgr.bo_reference = drm_intel_fake_bo_reference; + bufmgr_fake->bufmgr.bo_unreference = drm_intel_fake_bo_unreference; + bufmgr_fake->bufmgr.bo_map = drm_intel_fake_bo_map; + bufmgr_fake->bufmgr.bo_unmap = drm_intel_fake_bo_unmap; + bufmgr_fake->bufmgr.bo_subdata = drm_intel_fake_bo_subdata; + bufmgr_fake->bufmgr.bo_wait_rendering = + drm_intel_fake_bo_wait_rendering; + bufmgr_fake->bufmgr.bo_emit_reloc = drm_intel_fake_emit_reloc; + bufmgr_fake->bufmgr.destroy = drm_intel_fake_destroy; + bufmgr_fake->bufmgr.bo_exec = drm_intel_fake_bo_exec; + bufmgr_fake->bufmgr.check_aperture_space = + drm_intel_fake_check_aperture_space; + bufmgr_fake->bufmgr.debug = 0; + + bufmgr_fake->fd = fd; + bufmgr_fake->last_dispatch = (volatile int *)last_dispatch; + + return &bufmgr_fake->bufmgr; +} diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c new file mode 100644 index 0000000..4f17667 --- /dev/null +++ b/intel/intel_bufmgr_gem.c @@ -0,0 +1,3744 @@ +/************************************************************************** + * + * Copyright © 2007 Red Hat Inc. + * Copyright © 2007-2012 Intel Corporation + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> + * Keith Whitwell <keithw-at-tungstengraphics-dot-com> + * Eric Anholt <eric@anholt.net> + * Dave Airlie <airlied@linux.ie> + */ + +#include <xf86drm.h> +#include <xf86atomic.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <pthread.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <stdbool.h> + +#include "errno.h" +#ifndef ETIME +#define ETIME ETIMEDOUT +#endif +#include "libdrm_macros.h" +#include "libdrm_lists.h" +#include "intel_bufmgr.h" +#include "intel_bufmgr_priv.h" +#include "intel_chipset.h" +#include "string.h" + +#include "i915_drm.h" +#include "uthash.h" + +#if HAVE_VALGRIND +#include <valgrind.h> +#include <memcheck.h> +#define VG(x) x +#else +#define VG(x) +#endif + +#define memclear(s) memset(&s, 0, sizeof(s)) + +#define DBG(...) do { \ + if (bufmgr_gem->bufmgr.debug) \ + fprintf(stderr, __VA_ARGS__); \ +} while (0) + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#define MAX2(A, B) ((A) > (B) ? (A) : (B)) + +/** + * upper_32_bits - return bits 32-63 of a number + * @n: the number we're accessing + * + * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress + * the "right shift count >= width of type" warning when that quantity is + * 32-bits. + */ +#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) + +/** + * lower_32_bits - return bits 0-31 of a number + * @n: the number we're accessing + */ +#define lower_32_bits(n) ((__u32)(n)) + +typedef struct _drm_intel_bo_gem drm_intel_bo_gem; + +struct drm_intel_gem_bo_bucket { + drmMMListHead head; + unsigned long size; +}; + +typedef struct _drm_intel_bufmgr_gem { + drm_intel_bufmgr bufmgr; + + atomic_t refcount; + + int fd; + + int max_relocs; + + pthread_mutex_t lock; + + struct drm_i915_gem_exec_object2 *exec2_objects; + drm_intel_bo **exec_bos; + int exec_size; + int exec_count; + + /** Array of lists of cached gem objects of power-of-two sizes */ + struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; + int num_buckets; + time_t time; + + drmMMListHead managers; + + drm_intel_bo_gem *name_table; + drm_intel_bo_gem *handle_table; + + drmMMListHead vma_cache; + int vma_count, vma_open, vma_max; + + uint64_t gtt_size; + int available_fences; + int pci_device; + int gen; + unsigned int has_bsd : 1; + unsigned int has_blt : 1; + unsigned int has_relaxed_fencing : 1; + unsigned int has_llc : 1; + unsigned int has_wait_timeout : 1; + unsigned int bo_reuse : 1; + unsigned int no_exec : 1; + unsigned int has_vebox : 1; + unsigned int has_exec_async : 1; + bool fenced_relocs; + + struct { + void *ptr; + uint32_t handle; + } userptr_active; + +} drm_intel_bufmgr_gem; + +#define DRM_INTEL_RELOC_FENCE (1<<0) + +typedef struct _drm_intel_reloc_target_info { + drm_intel_bo *bo; + int flags; +} drm_intel_reloc_target; + +struct _drm_intel_bo_gem { + drm_intel_bo bo; + + atomic_t refcount; + uint32_t gem_handle; + const char *name; + + /** + * Kenel-assigned global name for this object + * + * List contains both flink named and prime fd'd objects + */ + unsigned int global_name; + + UT_hash_handle handle_hh; + UT_hash_handle name_hh; + + /** + * Index of the buffer within the validation list while preparing a + * batchbuffer execution. + */ + int validate_index; + + /** + * Current tiling mode + */ + uint32_t tiling_mode; + uint32_t swizzle_mode; + unsigned long stride; + + unsigned long kflags; + + time_t free_time; + + /** Array passed to the DRM containing relocation information. */ + struct drm_i915_gem_relocation_entry *relocs; + /** + * Array of info structs corresponding to relocs[i].target_handle etc + */ + drm_intel_reloc_target *reloc_target_info; + /** Number of entries in relocs */ + int reloc_count; + /** Array of BOs that are referenced by this buffer and will be softpinned */ + drm_intel_bo **softpin_target; + /** Number softpinned BOs that are referenced by this buffer */ + int softpin_target_count; + /** Maximum amount of softpinned BOs that are referenced by this buffer */ + int softpin_target_size; + + /** Mapped address for the buffer, saved across map/unmap cycles */ + void *mem_virtual; + /** GTT virtual address for the buffer, saved across map/unmap cycles */ + void *gtt_virtual; + /** WC CPU address for the buffer, saved across map/unmap cycles */ + void *wc_virtual; + /** + * Virtual address of the buffer allocated by user, used for userptr + * objects only. + */ + void *user_virtual; + int map_count; + drmMMListHead vma_list; + + /** BO cache list */ + drmMMListHead head; + + /** + * Boolean of whether this BO and its children have been included in + * the current drm_intel_bufmgr_check_aperture_space() total. + */ + bool included_in_check_aperture; + + /** + * Boolean of whether this buffer has been used as a relocation + * target and had its size accounted for, and thus can't have any + * further relocations added to it. + */ + bool used_as_reloc_target; + + /** + * Boolean of whether we have encountered an error whilst building the relocation tree. + */ + bool has_error; + + /** + * Boolean of whether this buffer can be re-used + */ + bool reusable; + + /** + * Boolean of whether the GPU is definitely not accessing the buffer. + * + * This is only valid when reusable, since non-reusable + * buffers are those that have been shared with other + * processes, so we don't know their state. + */ + bool idle; + + /** + * Boolean of whether this buffer was allocated with userptr + */ + bool is_userptr; + + /** + * Size in bytes of this buffer and its relocation descendents. + * + * Used to avoid costly tree walking in + * drm_intel_bufmgr_check_aperture in the common case. + */ + int reloc_tree_size; + + /** + * Number of potential fence registers required by this buffer and its + * relocations. + */ + int reloc_tree_fences; + + /** Flags that we may need to do the SW_FINISH ioctl on unmap. */ + bool mapped_cpu_write; +}; + +static unsigned int +drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); + +static unsigned int +drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); + +static int +drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode); + +static int +drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, + uint32_t tiling_mode, + uint32_t stride); + +static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, + time_t time); + +static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); + +static void drm_intel_gem_bo_free(drm_intel_bo *bo); + +static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo) +{ + return (drm_intel_bo_gem *)bo; +} + +static unsigned long +drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, + uint32_t *tiling_mode) +{ + unsigned long min_size, max_size; + unsigned long i; + + if (*tiling_mode == I915_TILING_NONE) + return size; + + /* 965+ just need multiples of page size for tiling */ + if (bufmgr_gem->gen >= 4) + return ROUND_UP_TO(size, 4096); + + /* Older chips need powers of two, of at least 512k or 1M */ + if (bufmgr_gem->gen == 3) { + min_size = 1024*1024; + max_size = 128*1024*1024; + } else { + min_size = 512*1024; + max_size = 64*1024*1024; + } + + if (size > max_size) { + *tiling_mode = I915_TILING_NONE; + return size; + } + + /* Do we need to allocate every page for the fence? */ + if (bufmgr_gem->has_relaxed_fencing) + return ROUND_UP_TO(size, 4096); + + for (i = min_size; i < size; i <<= 1) + ; + + return i; +} + +/* + * Round a given pitch up to the minimum required for X tiling on a + * given chip. We use 512 as the minimum to allow for a later tiling + * change. + */ +static unsigned long +drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, + unsigned long pitch, uint32_t *tiling_mode) +{ + unsigned long tile_width; + unsigned long i; + + /* If untiled, then just align it so that we can do rendering + * to it with the 3D engine. + */ + if (*tiling_mode == I915_TILING_NONE) + return ALIGN(pitch, 64); + + if (*tiling_mode == I915_TILING_X + || (IS_915(bufmgr_gem->pci_device) + && *tiling_mode == I915_TILING_Y)) + tile_width = 512; + else + tile_width = 128; + + /* 965 is flexible */ + if (bufmgr_gem->gen >= 4) + return ROUND_UP_TO(pitch, tile_width); + + /* The older hardware has a maximum pitch of 8192 with tiled + * surfaces, so fallback to untiled if it's too large. + */ + if (pitch > 8192) { + *tiling_mode = I915_TILING_NONE; + return ALIGN(pitch, 64); + } + + /* Pre-965 needs power of two tile width */ + for (i = tile_width; i < pitch; i <<= 1) + ; + + return i; +} + +static struct drm_intel_gem_bo_bucket * +drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, + unsigned long size) +{ + int i; + + for (i = 0; i < bufmgr_gem->num_buckets; i++) { + struct drm_intel_gem_bo_bucket *bucket = + &bufmgr_gem->cache_bucket[i]; + if (bucket->size >= size) { + return bucket; + } + } + + return NULL; +} + +static void +drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) +{ + int i, j; + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) { + DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name); + continue; + } + + for (j = 0; j < bo_gem->reloc_count; j++) { + drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; + drm_intel_bo_gem *target_gem = + (drm_intel_bo_gem *) target_bo; + + DBG("%2d: %d %s(%s)@0x%08x %08x -> " + "%d (%s)@0x%08x %08x + 0x%08x\n", + i, + bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name, + upper_32_bits(bo_gem->relocs[j].offset), + lower_32_bits(bo_gem->relocs[j].offset), + target_gem->gem_handle, + target_gem->name, + upper_32_bits(target_bo->offset64), + lower_32_bits(target_bo->offset64), + bo_gem->relocs[j].delta); + } + + for (j = 0; j < bo_gem->softpin_target_count; j++) { + drm_intel_bo *target_bo = bo_gem->softpin_target[j]; + drm_intel_bo_gem *target_gem = + (drm_intel_bo_gem *) target_bo; + DBG("%2d: %d %s(%s) -> " + "%d *(%s)@0x%08x %08x\n", + i, + bo_gem->gem_handle, + bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", + bo_gem->name, + target_gem->gem_handle, + target_gem->name, + upper_32_bits(target_bo->offset64), + lower_32_bits(target_bo->offset64)); + } + } +} + +static inline void +drm_intel_gem_bo_reference(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + atomic_inc(&bo_gem->refcount); +} + +/** + * Adds the given buffer to the list of buffers to be validated (moved into the + * appropriate memory type) with the next batch submission. + * + * If a buffer is validated multiple times in a batch submission, it ends up + * with the intersection of the memory type flags and the union of the + * access flags. + */ +static void +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + int index; + unsigned long flags; + + flags = 0; + if (need_fence) + flags |= EXEC_OBJECT_NEEDS_FENCE; + + if (bo_gem->validate_index != -1) { + bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; + return; + } + + /* Extend the array of validation entries as necessary. */ + if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { + int new_size = bufmgr_gem->exec_size * 2; + + if (new_size == 0) + new_size = 5; + + bufmgr_gem->exec2_objects = + realloc(bufmgr_gem->exec2_objects, + sizeof(*bufmgr_gem->exec2_objects) * new_size); + bufmgr_gem->exec_bos = + realloc(bufmgr_gem->exec_bos, + sizeof(*bufmgr_gem->exec_bos) * new_size); + bufmgr_gem->exec_size = new_size; + } + + index = bufmgr_gem->exec_count; + bo_gem->validate_index = index; + /* Fill in array entry */ + bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; + bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; + bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; + bufmgr_gem->exec2_objects[index].alignment = bo->align; + bufmgr_gem->exec2_objects[index].offset = bo->offset64; + bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags; + bufmgr_gem->exec2_objects[index].rsvd1 = 0; + bufmgr_gem->exec2_objects[index].rsvd2 = 0; + bufmgr_gem->exec_bos[index] = bo; + bufmgr_gem->exec_count++; +} + +#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ + sizeof(uint32_t)) + +static void +drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem, + unsigned int alignment) +{ + unsigned int size; + + assert(!bo_gem->used_as_reloc_target); + + /* The older chipsets are far-less flexible in terms of tiling, + * and require tiled buffer to be size aligned in the aperture. + * This means that in the worst possible case we will need a hole + * twice as large as the object in order for it to fit into the + * aperture. Optimal packing is for wimps. + */ + size = bo_gem->bo.size; + if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { + unsigned int min_size; + + if (bufmgr_gem->has_relaxed_fencing) { + if (bufmgr_gem->gen == 3) + min_size = 1024*1024; + else + min_size = 512*1024; + + while (min_size < size) + min_size *= 2; + } else + min_size = size; + + /* Account for worst-case alignment. */ + alignment = MAX2(alignment, min_size); + } + + bo_gem->reloc_tree_size = size + alignment; +} + +static int +drm_intel_setup_reloc_list(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + unsigned int max_relocs = bufmgr_gem->max_relocs; + + if (bo->size / 4 < max_relocs) + max_relocs = bo->size / 4; + + bo_gem->relocs = malloc(max_relocs * + sizeof(struct drm_i915_gem_relocation_entry)); + bo_gem->reloc_target_info = malloc(max_relocs * + sizeof(drm_intel_reloc_target)); + if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { + bo_gem->has_error = true; + + free (bo_gem->relocs); + bo_gem->relocs = NULL; + + free (bo_gem->reloc_target_info); + bo_gem->reloc_target_info = NULL; + + return 1; + } + + return 0; +} + +static int +drm_intel_gem_bo_busy(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_busy busy; + int ret; + + if (bo_gem->reusable && bo_gem->idle) + return false; + + memclear(busy); + busy.handle = bo_gem->gem_handle; + + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + if (ret == 0) { + bo_gem->idle = !busy.busy; + return busy.busy; + } else { + return false; + } +} + +static int +drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem, int state) +{ + struct drm_i915_gem_madvise madv; + + memclear(madv); + madv.handle = bo_gem->gem_handle; + madv.madv = state; + madv.retained = 1; + drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); + + return madv.retained; +} + +static int +drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) +{ + return drm_intel_gem_bo_madvise_internal + ((drm_intel_bufmgr_gem *) bo->bufmgr, + (drm_intel_bo_gem *) bo, + madv); +} + +/* drop the oldest entries that have been purged by the kernel */ +static void +drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, + struct drm_intel_gem_bo_bucket *bucket) +{ + while (!DRMLISTEMPTY(&bucket->head)) { + drm_intel_bo_gem *bo_gem; + + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.next, head); + if (drm_intel_gem_bo_madvise_internal + (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) + break; + + DRMLISTDEL(&bo_gem->head); + drm_intel_gem_bo_free(&bo_gem->bo); + } +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned long flags, + uint32_t tiling_mode, + unsigned long stride, + unsigned int alignment) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + drm_intel_bo_gem *bo_gem; + unsigned int page_size = getpagesize(); + int ret; + struct drm_intel_gem_bo_bucket *bucket; + bool alloc_from_cache; + unsigned long bo_size; + bool for_render = false; + + if (flags & BO_ALLOC_FOR_RENDER) + for_render = true; + + /* Round the allocated size up to a power of two number of pages. */ + bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); + + /* If we don't have caching at this size, don't actually round the + * allocation up. + */ + if (bucket == NULL) { + bo_size = size; + if (bo_size < page_size) + bo_size = page_size; + } else { + bo_size = bucket->size; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + /* Get a buffer out of the cache if available */ +retry: + alloc_from_cache = false; + if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { + if (for_render) { + /* Allocate new render-target BOs from the tail (MRU) + * of the list, as it will likely be hot in the GPU + * cache and in the aperture for us. + */ + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.prev, head); + DRMLISTDEL(&bo_gem->head); + alloc_from_cache = true; + bo_gem->bo.align = alignment; + } else { + assert(alignment == 0); + /* For non-render-target BOs (where we're probably + * going to map it first thing in order to fill it + * with data), check if the last BO in the cache is + * unbusy, and only reuse in that case. Otherwise, + * allocating a new buffer is probably faster than + * waiting for the GPU to finish. + */ + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.next, head); + if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { + alloc_from_cache = true; + DRMLISTDEL(&bo_gem->head); + } + } + + if (alloc_from_cache) { + if (!drm_intel_gem_bo_madvise_internal + (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { + drm_intel_gem_bo_free(&bo_gem->bo); + drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, + bucket); + goto retry; + } + + if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, + tiling_mode, + stride)) { + drm_intel_gem_bo_free(&bo_gem->bo); + goto retry; + } + } + } + + if (!alloc_from_cache) { + struct drm_i915_gem_create create; + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + goto err; + + /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized + list (vma_list), so better set the list head here */ + DRMINITLISTHEAD(&bo_gem->vma_list); + + bo_gem->bo.size = bo_size; + + memclear(create); + create.size = bo_size; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_CREATE, + &create); + if (ret != 0) { + free(bo_gem); + goto err; + } + + bo_gem->gem_handle = create.handle; + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), + bo_gem); + + bo_gem->bo.handle = bo_gem->gem_handle; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->bo.align = alignment; + + bo_gem->tiling_mode = I915_TILING_NONE; + bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + bo_gem->stride = 0; + + if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, + tiling_mode, + stride)) + goto err_free; + } + + bo_gem->name = name; + atomic_set(&bo_gem->refcount, 1); + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = true; + + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); + pthread_mutex_unlock(&bufmgr_gem->lock); + + DBG("bo_create: buf %d (%s) %ldb\n", + bo_gem->gem_handle, bo_gem->name, size); + + return &bo_gem->bo; + +err_free: + drm_intel_gem_bo_free(&bo_gem->bo); +err: + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment) +{ + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, + BO_ALLOC_FOR_RENDER, + I915_TILING_NONE, 0, + alignment); +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment) +{ + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, + I915_TILING_NONE, 0, 0); +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, + int x, int y, int cpp, uint32_t *tiling_mode, + unsigned long *pitch, unsigned long flags) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + unsigned long size, stride; + uint32_t tiling; + + do { + unsigned long aligned_y, height_alignment; + + tiling = *tiling_mode; + + /* If we're tiled, our allocations are in 8 or 32-row blocks, + * so failure to align our height means that we won't allocate + * enough pages. + * + * If we're untiled, we still have to align to 2 rows high + * because the data port accesses 2x2 blocks even if the + * bottom row isn't to be rendered, so failure to align means + * we could walk off the end of the GTT and fault. This is + * documented on 965, and may be the case on older chipsets + * too so we try to be careful. + */ + aligned_y = y; + height_alignment = 2; + + if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) + height_alignment = 16; + else if (tiling == I915_TILING_X + || (IS_915(bufmgr_gem->pci_device) + && tiling == I915_TILING_Y)) + height_alignment = 8; + else if (tiling == I915_TILING_Y) + height_alignment = 32; + aligned_y = ALIGN(y, height_alignment); + + stride = x * cpp; + stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); + size = stride * aligned_y; + size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); + } while (*tiling_mode != tiling); + *pitch = stride; + + if (tiling == I915_TILING_NONE) + stride = 0; + + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, + tiling, stride, 0); +} + +static drm_intel_bo * +drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, + void *addr, + uint32_t tiling_mode, + uint32_t stride, + unsigned long size, + unsigned long flags) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + drm_intel_bo_gem *bo_gem; + int ret; + struct drm_i915_gem_userptr userptr; + + /* Tiling with userptr surfaces is not supported + * on all hardware so refuse it for time being. + */ + if (tiling_mode != I915_TILING_NONE) + return NULL; + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + return NULL; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + + bo_gem->bo.size = size; + + memclear(userptr); + userptr.user_ptr = (__u64)((unsigned long)addr); + userptr.user_size = size; + userptr.flags = flags; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_USERPTR, + &userptr); + if (ret != 0) { + DBG("bo_create_userptr: " + "ioctl failed with user ptr %p size 0x%lx, " + "user flags 0x%lx\n", addr, size, flags); + free(bo_gem); + return NULL; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + + bo_gem->gem_handle = userptr.handle; + bo_gem->bo.handle = bo_gem->gem_handle; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->is_userptr = true; + bo_gem->bo.virtual = addr; + /* Save the address provided by user */ + bo_gem->user_virtual = addr; + bo_gem->tiling_mode = I915_TILING_NONE; + bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + bo_gem->stride = 0; + + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), + bo_gem); + + bo_gem->name = name; + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = false; + + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + pthread_mutex_unlock(&bufmgr_gem->lock); + + DBG("bo_create_userptr: " + "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", + addr, bo_gem->gem_handle, bo_gem->name, + size, stride, tiling_mode); + + return &bo_gem->bo; +} + +static bool +has_userptr(drm_intel_bufmgr_gem *bufmgr_gem) +{ + int ret; + void *ptr; + long pgsz; + struct drm_i915_gem_userptr userptr; + + pgsz = sysconf(_SC_PAGESIZE); + assert(pgsz > 0); + + ret = posix_memalign(&ptr, pgsz, pgsz); + if (ret) { + DBG("Failed to get a page (%ld) for userptr detection!\n", + pgsz); + return false; + } + + memclear(userptr); + userptr.user_ptr = (__u64)(unsigned long)ptr; + userptr.user_size = pgsz; + +retry: + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); + if (ret) { + if (errno == ENODEV && userptr.flags == 0) { + userptr.flags = I915_USERPTR_UNSYNCHRONIZED; + goto retry; + } + free(ptr); + return false; + } + + /* We don't release the userptr bo here as we want to keep the + * kernel mm tracking alive for our lifetime. The first time we + * create a userptr object the kernel has to install a mmu_notifer + * which is a heavyweight operation (e.g. it requires taking all + * mm_locks and stop_machine()). + */ + + bufmgr_gem->userptr_active.ptr = ptr; + bufmgr_gem->userptr_active.handle = userptr.handle; + + return true; +} + +static drm_intel_bo * +check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, + const char *name, + void *addr, + uint32_t tiling_mode, + uint32_t stride, + unsigned long size, + unsigned long flags) +{ + if (has_userptr((drm_intel_bufmgr_gem *)bufmgr)) + bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr; + else + bufmgr->bo_alloc_userptr = NULL; + + return drm_intel_bo_alloc_userptr(bufmgr, name, addr, + tiling_mode, stride, size, flags); +} + +static int get_tiling_mode(drm_intel_bufmgr_gem *bufmgr_gem, + uint32_t gem_handle, + uint32_t *tiling_mode, + uint32_t *swizzle_mode) +{ + struct drm_i915_gem_get_tiling get_tiling = { + .handle = gem_handle, + }; + int ret; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_GET_TILING, + &get_tiling); + if (ret != 0 && errno != EOPNOTSUPP) + return ret; + + *tiling_mode = get_tiling.tiling_mode; + *swizzle_mode = get_tiling.swizzle_mode; + + return 0; +} + +/** + * Returns a drm_intel_bo wrapping the given buffer object handle. + * + * This can be used when one application needs to pass a buffer object + * to another. + */ +drm_public drm_intel_bo * +drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, + const char *name, + unsigned int handle) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + drm_intel_bo_gem *bo_gem; + int ret; + struct drm_gem_open open_arg; + + /* At the moment most applications only have a few named bo. + * For instance, in a DRI client only the render buffers passed + * between X and the client are named. And since X returns the + * alternating names for the front/back buffer a linear search + * provides a sufficiently fast match. + */ + pthread_mutex_lock(&bufmgr_gem->lock); + HASH_FIND(name_hh, bufmgr_gem->name_table, + &handle, sizeof(handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; + } + + memclear(open_arg); + open_arg.name = handle; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_GEM_OPEN, + &open_arg); + if (ret != 0) { + DBG("Couldn't reference %s handle 0x%08x: %s\n", + name, handle, strerror(errno)); + bo_gem = NULL; + goto out; + } + /* Now see if someone has used a prime handle to get this + * object from the kernel before by looking through the list + * again for a matching gem_handle + */ + HASH_FIND(handle_hh, bufmgr_gem->handle_table, + &open_arg.handle, sizeof(open_arg.handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; + } + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + goto out; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + + bo_gem->bo.size = open_arg.size; + bo_gem->bo.offset = 0; + bo_gem->bo.offset64 = 0; + bo_gem->bo.virtual = NULL; + bo_gem->bo.bufmgr = bufmgr; + bo_gem->name = name; + bo_gem->validate_index = -1; + bo_gem->gem_handle = open_arg.handle; + bo_gem->bo.handle = open_arg.handle; + bo_gem->global_name = handle; + bo_gem->reusable = false; + + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), bo_gem); + HASH_ADD(name_hh, bufmgr_gem->name_table, + global_name, sizeof(bo_gem->global_name), bo_gem); + + ret = get_tiling_mode(bufmgr_gem, bo_gem->gem_handle, + &bo_gem->tiling_mode, &bo_gem->swizzle_mode); + if (ret != 0) + goto err_unref; + + /* XXX stride is unknown */ + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); + +out: + pthread_mutex_unlock(&bufmgr_gem->lock); + return &bo_gem->bo; + +err_unref: + drm_intel_gem_bo_free(&bo_gem->bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; +} + +static void +drm_intel_gem_bo_free(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int ret; + + DRMLISTDEL(&bo_gem->vma_list); + if (bo_gem->mem_virtual) { + VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); + drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); + bufmgr_gem->vma_count--; + } + if (bo_gem->wc_virtual) { + VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0)); + drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); + bufmgr_gem->vma_count--; + } + if (bo_gem->gtt_virtual) { + drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); + bufmgr_gem->vma_count--; + } + + if (bo_gem->global_name) + HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem); + HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem); + + /* Close this object */ + ret = drmCloseBufferHandle(bufmgr_gem->fd, bo_gem->gem_handle); + if (ret != 0) { + DBG("drmCloseBufferHandle %d failed (%s): %s\n", + bo_gem->gem_handle, bo_gem->name, strerror(errno)); + } + free(bo); +} + +static void +drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) +{ +#if HAVE_VALGRIND + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->mem_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); + + if (bo_gem->wc_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size); + + if (bo_gem->gtt_virtual) + VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); +#endif +} + +/** Frees all cached buffers significantly older than @time. */ +static void +drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) +{ + int i; + + if (bufmgr_gem->time == time) + return; + + for (i = 0; i < bufmgr_gem->num_buckets; i++) { + struct drm_intel_gem_bo_bucket *bucket = + &bufmgr_gem->cache_bucket[i]; + + while (!DRMLISTEMPTY(&bucket->head)) { + drm_intel_bo_gem *bo_gem; + + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.next, head); + if (time - bo_gem->free_time <= 1) + break; + + DRMLISTDEL(&bo_gem->head); + + drm_intel_gem_bo_free(&bo_gem->bo); + } + } + + bufmgr_gem->time = time; +} + +static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) +{ + int limit; + + DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, + bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); + + if (bufmgr_gem->vma_max < 0) + return; + + /* We may need to evict a few entries in order to create new mmaps */ + limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; + if (limit < 0) + limit = 0; + + while (bufmgr_gem->vma_count > limit) { + drm_intel_bo_gem *bo_gem; + + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bufmgr_gem->vma_cache.next, + vma_list); + assert(bo_gem->map_count == 0); + DRMLISTDELINIT(&bo_gem->vma_list); + + if (bo_gem->mem_virtual) { + drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); + bo_gem->mem_virtual = NULL; + bufmgr_gem->vma_count--; + } + if (bo_gem->wc_virtual) { + drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); + bo_gem->wc_virtual = NULL; + bufmgr_gem->vma_count--; + } + if (bo_gem->gtt_virtual) { + drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); + bo_gem->gtt_virtual = NULL; + bufmgr_gem->vma_count--; + } + } +} + +static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem) +{ + bufmgr_gem->vma_open--; + DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); + if (bo_gem->mem_virtual) + bufmgr_gem->vma_count++; + if (bo_gem->wc_virtual) + bufmgr_gem->vma_count++; + if (bo_gem->gtt_virtual) + bufmgr_gem->vma_count++; + drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); +} + +static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, + drm_intel_bo_gem *bo_gem) +{ + bufmgr_gem->vma_open++; + DRMLISTDEL(&bo_gem->vma_list); + if (bo_gem->mem_virtual) + bufmgr_gem->vma_count--; + if (bo_gem->wc_virtual) + bufmgr_gem->vma_count--; + if (bo_gem->gtt_virtual) + bufmgr_gem->vma_count--; + drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); +} + +static void +drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_intel_gem_bo_bucket *bucket; + int i; + + /* Unreference all the target buffers */ + for (i = 0; i < bo_gem->reloc_count; i++) { + if (bo_gem->reloc_target_info[i].bo != bo) { + drm_intel_gem_bo_unreference_locked_timed(bo_gem-> + reloc_target_info[i].bo, + time); + } + } + for (i = 0; i < bo_gem->softpin_target_count; i++) + drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i], + time); + bo_gem->kflags = 0; + bo_gem->reloc_count = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->softpin_target_count = 0; + + DBG("bo_unreference final: %d (%s)\n", + bo_gem->gem_handle, bo_gem->name); + + /* release memory associated with this object */ + if (bo_gem->reloc_target_info) { + free(bo_gem->reloc_target_info); + bo_gem->reloc_target_info = NULL; + } + if (bo_gem->relocs) { + free(bo_gem->relocs); + bo_gem->relocs = NULL; + } + if (bo_gem->softpin_target) { + free(bo_gem->softpin_target); + bo_gem->softpin_target = NULL; + bo_gem->softpin_target_size = 0; + } + + /* Clear any left-over mappings */ + if (bo_gem->map_count) { + DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); + bo_gem->map_count = 0; + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + } + + bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); + /* Put the buffer into our internal cache for reuse if we can. */ + if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && + drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, + I915_MADV_DONTNEED)) { + bo_gem->free_time = time; + + bo_gem->name = NULL; + bo_gem->validate_index = -1; + + DRMLISTADDTAIL(&bo_gem->head, &bucket->head); + } else { + drm_intel_gem_bo_free(bo); + } +} + +static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, + time_t time) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + assert(atomic_read(&bo_gem->refcount) > 0); + if (atomic_dec_and_test(&bo_gem->refcount)) + drm_intel_gem_bo_unreference_final(bo, time); +} + +static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bufmgr_gem *bufmgr_gem; + struct timespec time; + + assert(atomic_read(&bo_gem->refcount) > 0); + + if (atomic_add_unless(&bo_gem->refcount, -1, 1)) + return; + + bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + + clock_gettime(CLOCK_MONOTONIC, &time); + + pthread_mutex_lock(&bufmgr_gem->lock); + + if (atomic_dec_and_test(&bo_gem->refcount)) { + drm_intel_gem_bo_unreference_final(bo, time.tv_sec); + drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); + } + + pthread_mutex_unlock(&bufmgr_gem->lock); +} + +static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + if (bo_gem->is_userptr) { + /* Return the same user ptr */ + bo->virtual = bo_gem->user_virtual; + return 0; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + if (!bo_gem->mem_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg); + if (ret != 0) { + ret = -errno; + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + pthread_mutex_unlock(&bufmgr_gem->lock); + return ret; + } + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, + bo_gem->mem_virtual); + bo->virtual = bo_gem->mem_virtual; + + memclear(set_domain); + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = I915_GEM_DOMAIN_CPU; + if (write_enable) + set_domain.write_domain = I915_GEM_DOMAIN_CPU; + else + set_domain.write_domain = 0; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); + if (ret != 0) { + DBG("%s:%d: Error setting to CPU domain %d: %s\n", + __FILE__, __LINE__, bo_gem->gem_handle, + strerror(errno)); + } + + if (write_enable) + bo_gem->mapped_cpu_write = true; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); + pthread_mutex_unlock(&bufmgr_gem->lock); + + return 0; +} + +static int +map_gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int ret; + + if (bo_gem->is_userptr) + return -EINVAL; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + /* Get a mapping of the buffer if we haven't before. */ + if (bo_gem->gtt_virtual == NULL) { + struct drm_i915_gem_mmap_gtt mmap_arg; + + DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + + /* Get the fake offset back... */ + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP_GTT, + &mmap_arg); + if (ret != 0) { + ret = -errno; + DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, bo_gem->name, + strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + return ret; + } + + /* and mmap it */ + bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, + MAP_SHARED, bufmgr_gem->fd, + mmap_arg.offset); + if (bo_gem->gtt_virtual == MAP_FAILED) { + bo_gem->gtt_virtual = NULL; + ret = -errno; + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, + bo_gem->gem_handle, bo_gem->name, + strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + return ret; + } + } + + bo->virtual = bo_gem->gtt_virtual; + + DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, + bo_gem->gtt_virtual); + + return 0; +} + +drm_public int +drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + pthread_mutex_lock(&bufmgr_gem->lock); + + ret = map_gtt(bo); + if (ret) { + pthread_mutex_unlock(&bufmgr_gem->lock); + return ret; + } + + /* Now move it to the GTT domain so that the GPU and CPU + * caches are flushed and the GPU isn't actively using the + * buffer. + * + * The pagefault handler does this domain change for us when + * it has unbound the BO from the GTT, but it's up to us to + * tell it when we're about to use things if we had done + * rendering and it still happens to be bound to the GTT. + */ + memclear(set_domain); + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); + if (ret != 0) { + DBG("%s:%d: Error setting domain %d: %s\n", + __FILE__, __LINE__, bo_gem->gem_handle, + strerror(errno)); + } + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); + pthread_mutex_unlock(&bufmgr_gem->lock); + + return 0; +} + +/** + * Performs a mapping of the buffer object like the normal GTT + * mapping, but avoids waiting for the GPU to be done reading from or + * rendering to the buffer. + * + * This is used in the implementation of GL_ARB_map_buffer_range: The + * user asks to create a buffer, then does a mapping, fills some + * space, runs a drawing command, then asks to map it again without + * synchronizing because it guarantees that it won't write over the + * data that the GPU is busy using (or, more specifically, that if it + * does write over the data, it acknowledges that rendering is + * undefined). + */ + +drm_public int +drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; +#if HAVE_VALGRIND + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; +#endif + int ret; + + /* If the CPU cache isn't coherent with the GTT, then use a + * regular synchronized mapping. The problem is that we don't + * track where the buffer was last used on the CPU side in + * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so + * we would potentially corrupt the buffer even when the user + * does reasonable things. + */ + if (!bufmgr_gem->has_llc) + return drm_intel_gem_bo_map_gtt(bo); + + pthread_mutex_lock(&bufmgr_gem->lock); + + ret = map_gtt(bo); + if (ret == 0) { + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); + } + + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int ret = 0; + + if (bo == NULL) + return 0; + + if (bo_gem->is_userptr) + return 0; + + bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + + pthread_mutex_lock(&bufmgr_gem->lock); + + if (bo_gem->map_count <= 0) { + DBG("attempted to unmap an unmapped bo\n"); + pthread_mutex_unlock(&bufmgr_gem->lock); + /* Preserve the old behaviour of just treating this as a + * no-op rather than reporting the error. + */ + return 0; + } + + if (bo_gem->mapped_cpu_write) { + struct drm_i915_gem_sw_finish sw_finish; + + /* Cause a flush to happen if the buffer's pinned for + * scanout, so the results show up in a timely manner. + * Unlike GTT set domains, this only does work if the + * buffer should be scanout-related. + */ + memclear(sw_finish); + sw_finish.handle = bo_gem->gem_handle; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SW_FINISH, + &sw_finish); + ret = ret == -1 ? -errno : 0; + + bo_gem->mapped_cpu_write = false; + } + + /* We need to unmap after every innovation as we cannot track + * an open vma for every bo as that will exhaust the system + * limits and cause later failures. + */ + if (--bo_gem->map_count == 0) { + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + bo->virtual = NULL; + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +drm_public int +drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) +{ + return drm_intel_gem_bo_unmap(bo); +} + +static bool is_cache_coherent(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_caching arg = {}; + + arg.handle = bo_gem->gem_handle; + if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_CACHING, &arg)) + assert(false); + return arg.caching != I915_CACHING_NONE; +} + +static void set_domain(drm_intel_bo *bo, uint32_t read, uint32_t write) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain arg = {}; + + arg.handle = bo_gem->gem_handle; + arg.read_domains = read; + arg.write_domain = write; + if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &arg)) + assert(false); +} + +static int mmap_write(drm_intel_bo *bo, unsigned long offset, + unsigned long length, const void *buf) +{ + void *map = NULL; + + if (!length) + return 0; + + if (is_cache_coherent(bo)) { + map = drm_intel_gem_bo_map__cpu(bo); + if (map) + set_domain(bo, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU); + } + if (!map) { + map = drm_intel_gem_bo_map__wc(bo); + if (map) + set_domain(bo, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC); + } + + assert(map); + memcpy((char *)map + offset, buf, length); + drm_intel_gem_bo_unmap(bo); + return 0; +} + +static int mmap_read(drm_intel_bo *bo, unsigned long offset, + unsigned long length, void *buf) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + void *map = NULL; + + if (!length) + return 0; + + if (bufmgr_gem->has_llc || is_cache_coherent(bo)) { + map = drm_intel_gem_bo_map__cpu(bo); + if (map) + set_domain(bo, I915_GEM_DOMAIN_CPU, 0); + } + if (!map) { + map = drm_intel_gem_bo_map__wc(bo); + if (map) + set_domain(bo, I915_GEM_DOMAIN_WC, 0); + } + + assert(map); + memcpy(buf, (char *)map + offset, length); + drm_intel_gem_bo_unmap(bo); + return 0; +} + +static int +drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, const void *data) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_pwrite pwrite; + int ret; + + if (bo_gem->is_userptr) + return -EINVAL; + + memclear(pwrite); + pwrite.handle = bo_gem->gem_handle; + pwrite.offset = offset; + pwrite.size = size; + pwrite.data_ptr = (uint64_t) (uintptr_t) data; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_PWRITE, + &pwrite); + if (ret) + ret = -errno; + + if (ret != 0 && ret != -EOPNOTSUPP) { + DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, + (int)size, strerror(errno)); + return ret; + } + + if (ret == -EOPNOTSUPP) + mmap_write(bo, offset, size, data); + + return 0; +} + +static int +drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; + int ret; + + memclear(get_pipe_from_crtc_id); + get_pipe_from_crtc_id.crtc_id = crtc_id; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, + &get_pipe_from_crtc_id); + if (ret != 0) { + /* We return -1 here to signal that we don't + * know which pipe is associated with this crtc. + * This lets the caller know that this information + * isn't available; using the wrong pipe for + * vblank waiting can cause the chipset to lock up + */ + return -1; + } + + return get_pipe_from_crtc_id.pipe; +} + +static int +drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, + unsigned long size, void *data) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_pread pread; + int ret; + + if (bo_gem->is_userptr) + return -EINVAL; + + memclear(pread); + pread.handle = bo_gem->gem_handle; + pread.offset = offset; + pread.size = size; + pread.data_ptr = (uint64_t) (uintptr_t) data; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_PREAD, + &pread); + if (ret) + ret = -errno; + + if (ret != 0 && ret != -EOPNOTSUPP) { + DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, + (int)size, strerror(errno)); + return ret; + } + + if (ret == -EOPNOTSUPP) + mmap_read(bo, offset, size, data); + + return 0; +} + +/** Waits for all GPU rendering with the object to have completed. */ +static void +drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) +{ + drm_intel_gem_bo_start_gtt_access(bo, 1); +} + +/** + * Waits on a BO for the given amount of time. + * + * @bo: buffer object to wait for + * @timeout_ns: amount of time to wait in nanoseconds. + * If value is less than 0, an infinite wait will occur. + * + * Returns 0 if the wait was successful ie. the last batch referencing the + * object has completed within the allotted time. Otherwise some negative return + * value describes the error. Of particular interest is -ETIME when the wait has + * failed to yield the desired result. + * + * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows + * the operation to give up after a certain amount of time. Another subtle + * difference is the internal locking semantics are different (this variant does + * not hold the lock for the duration of the wait). This makes the wait subject + * to a larger userspace race window. + * + * The implementation shall wait until the object is no longer actively + * referenced within a batch buffer at the time of the call. The wait will + * not guarantee that the buffer is re-issued via another thread, or an flinked + * handle. Userspace must make sure this race does not occur if such precision + * is important. + * + * Note that some kernels have broken the inifite wait for negative values + * promise, upgrade to latest stable kernels if this is the case. + */ +drm_public int +drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_wait wait; + int ret; + + if (!bufmgr_gem->has_wait_timeout) { + DBG("%s:%d: Timed wait is not supported. Falling back to " + "infinite wait\n", __FILE__, __LINE__); + if (timeout_ns) { + drm_intel_gem_bo_wait_rendering(bo); + return 0; + } else { + return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; + } + } + + memclear(wait); + wait.bo_handle = bo_gem->gem_handle; + wait.timeout_ns = timeout_ns; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); + if (ret == -1) + return -errno; + + return ret; +} + +/** + * Sets the object to the GTT read and possibly write domain, used by the X + * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). + * + * In combination with drm_intel_gem_bo_pin() and manual fence management, we + * can do tiled pixmaps this way. + */ +drm_public void +drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_domain set_domain; + int ret; + + memclear(set_domain); + set_domain.handle = bo_gem->gem_handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SET_DOMAIN, + &set_domain); + if (ret != 0) { + DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + set_domain.read_domains, set_domain.write_domain, + strerror(errno)); + } +} + +static void +drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + int i, ret; + + free(bufmgr_gem->exec2_objects); + free(bufmgr_gem->exec_bos); + + pthread_mutex_destroy(&bufmgr_gem->lock); + + /* Free any cached buffer objects we were going to reuse */ + for (i = 0; i < bufmgr_gem->num_buckets; i++) { + struct drm_intel_gem_bo_bucket *bucket = + &bufmgr_gem->cache_bucket[i]; + drm_intel_bo_gem *bo_gem; + + while (!DRMLISTEMPTY(&bucket->head)) { + bo_gem = DRMLISTENTRY(drm_intel_bo_gem, + bucket->head.next, head); + DRMLISTDEL(&bo_gem->head); + + drm_intel_gem_bo_free(&bo_gem->bo); + } + } + + /* Release userptr bo kept hanging around for optimisation. */ + if (bufmgr_gem->userptr_active.ptr) { + ret = drmCloseBufferHandle(bufmgr_gem->fd, + bufmgr_gem->userptr_active.handle); + free(bufmgr_gem->userptr_active.ptr); + if (ret) + fprintf(stderr, + "Failed to release test userptr object! (%d) " + "i915 kernel driver may not be sane!\n", errno); + } + + free(bufmgr); +} + +/** + * Adds the target buffer to the validation list and adds the relocation + * to the reloc_buffer's relocation list. + * + * The relocation entry at the given offset must already contain the + * precomputed relocation value, because the kernel will optimize out + * the relocation entry write when the buffer hasn't moved from the + * last known offset in target_bo. + */ +static int +do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain, + bool need_fence) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; + bool fenced_command; + + if (bo_gem->has_error) + return -ENOMEM; + + if (target_bo_gem->has_error) { + bo_gem->has_error = true; + return -ENOMEM; + } + + /* We never use HW fences for rendering on 965+ */ + if (bufmgr_gem->gen >= 4) + need_fence = false; + + fenced_command = need_fence; + if (target_bo_gem->tiling_mode == I915_TILING_NONE) + need_fence = false; + + /* Create a new relocation list if needed */ + if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) + return -ENOMEM; + + /* Check overflow */ + assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); + + /* Check args */ + assert(offset <= bo->size - 4); + assert((write_domain & (write_domain - 1)) == 0); + + /* An object needing a fence is a tiled buffer, so it won't have + * relocs to other buffers. + */ + if (need_fence) { + assert(target_bo_gem->reloc_count == 0); + target_bo_gem->reloc_tree_fences = 1; + } + + /* Make sure that we're not adding a reloc to something whose size has + * already been accounted for. + */ + assert(!bo_gem->used_as_reloc_target); + if (target_bo_gem != bo_gem) { + target_bo_gem->used_as_reloc_target = true; + bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; + bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; + } + + bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; + if (target_bo != bo) + drm_intel_gem_bo_reference(target_bo); + if (fenced_command) + bo_gem->reloc_target_info[bo_gem->reloc_count].flags = + DRM_INTEL_RELOC_FENCE; + else + bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; + + bo_gem->relocs[bo_gem->reloc_count].offset = offset; + bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; + bo_gem->relocs[bo_gem->reloc_count].target_handle = + target_bo_gem->gem_handle; + bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; + bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; + bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; + bo_gem->reloc_count++; + + return 0; +} + +static void +drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (enable) + bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; + else + bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; +} + +static int +drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; + if (bo_gem->has_error) + return -ENOMEM; + + if (target_bo_gem->has_error) { + bo_gem->has_error = true; + return -ENOMEM; + } + + if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED)) + return -EINVAL; + if (target_bo_gem == bo_gem) + return -EINVAL; + + if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) { + int new_size = bo_gem->softpin_target_size * 2; + if (new_size == 0) + new_size = bufmgr_gem->max_relocs; + + bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size * + sizeof(drm_intel_bo *)); + if (!bo_gem->softpin_target) + return -ENOMEM; + + bo_gem->softpin_target_size = new_size; + } + bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo; + drm_intel_gem_bo_reference(target_bo); + bo_gem->softpin_target_count++; + + return 0; +} + +static int +drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; + + if (target_bo_gem->kflags & EXEC_OBJECT_PINNED) + return drm_intel_gem_bo_add_softpin_target(bo, target_bo); + else + return do_bo_emit_reloc(bo, offset, target_bo, target_offset, + read_domains, write_domain, + !bufmgr_gem->fenced_relocs); +} + +static int +drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, + uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain) +{ + return do_bo_emit_reloc(bo, offset, target_bo, target_offset, + read_domains, write_domain, true); +} + +drm_public int +drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + return bo_gem->reloc_count; +} + +/** + * Removes existing relocation entries in the BO after "start". + * + * This allows a user to avoid a two-step process for state setup with + * counting up all the buffer objects and doing a + * drm_intel_bufmgr_check_aperture_space() before emitting any of the + * relocations for the state setup. Instead, save the state of the + * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the + * state, and then check if it still fits in the aperture. + * + * Any further drm_intel_bufmgr_check_aperture_space() queries + * involving this buffer in the tree are undefined after this call. + * + * This also removes all softpinned targets being referenced by the BO. + */ +drm_public void +drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + struct timespec time; + + clock_gettime(CLOCK_MONOTONIC, &time); + + assert(bo_gem->reloc_count >= start); + + /* Unreference the cleared target buffers */ + pthread_mutex_lock(&bufmgr_gem->lock); + + for (i = start; i < bo_gem->reloc_count; i++) { + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; + if (&target_bo_gem->bo != bo) { + bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; + drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, + time.tv_sec); + } + } + bo_gem->reloc_count = start; + + for (i = 0; i < bo_gem->softpin_target_count; i++) { + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i]; + drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); + } + bo_gem->softpin_target_count = 0; + + pthread_mutex_unlock(&bufmgr_gem->lock); + +} + +/** + * Walk the tree of relocations rooted at BO and accumulate the list of + * validations to be performed and update the relocation buffers with + * index values into the validation list. + */ +static void +drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + int i; + + if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) + return; + + for (i = 0; i < bo_gem->reloc_count; i++) { + drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; + int need_fence; + + if (target_bo == bo) + continue; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + + /* Continue walking the tree depth-first. */ + drm_intel_gem_bo_process_reloc2(target_bo); + + need_fence = (bo_gem->reloc_target_info[i].flags & + DRM_INTEL_RELOC_FENCE); + + /* Add the target to the validate list */ + drm_intel_add_validate_buffer2(target_bo, need_fence); + } + + for (i = 0; i < bo_gem->softpin_target_count; i++) { + drm_intel_bo *target_bo = bo_gem->softpin_target[i]; + + if (target_bo == bo) + continue; + + drm_intel_gem_bo_mark_mmaps_incoherent(bo); + drm_intel_gem_bo_process_reloc2(target_bo); + drm_intel_add_validate_buffer2(target_bo, false); + } +} + +static void +drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) +{ + int i; + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; + + /* Update the buffer offset */ + if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { + /* If we're seeing softpinned object here it means that the kernel + * has relocated our object... Indicating a programming error + */ + assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED)); + DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", + bo_gem->gem_handle, bo_gem->name, + upper_32_bits(bo->offset64), + lower_32_bits(bo->offset64), + upper_32_bits(bufmgr_gem->exec2_objects[i].offset), + lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); + bo->offset64 = bufmgr_gem->exec2_objects[i].offset; + bo->offset = bufmgr_gem->exec2_objects[i].offset; + } + } +} + +drm_public void +drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, + int x1, int y1, int width, int height, + enum aub_dump_bmp_format format, + int pitch, int offset) +{ +} + +static int +do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + int in_fence, int *out_fence, + unsigned int flags) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; + struct drm_i915_gem_execbuffer2 execbuf; + int ret = 0; + int i; + + if (to_bo_gem(bo)->has_error) + return -ENOMEM; + + switch (flags & 0x7) { + default: + return -EINVAL; + case I915_EXEC_BLT: + if (!bufmgr_gem->has_blt) + return -EINVAL; + break; + case I915_EXEC_BSD: + if (!bufmgr_gem->has_bsd) + return -EINVAL; + break; + case I915_EXEC_VEBOX: + if (!bufmgr_gem->has_vebox) + return -EINVAL; + break; + case I915_EXEC_RENDER: + case I915_EXEC_DEFAULT: + break; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + /* Update indices and set up the validate list. */ + drm_intel_gem_bo_process_reloc2(bo); + + /* Add the batch buffer to the validation list. There are no relocations + * pointing to it. + */ + drm_intel_add_validate_buffer2(bo, 0); + + memclear(execbuf); + execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; + execbuf.buffer_count = bufmgr_gem->exec_count; + execbuf.batch_start_offset = 0; + execbuf.batch_len = used; + execbuf.cliprects_ptr = (uintptr_t)cliprects; + execbuf.num_cliprects = num_cliprects; + execbuf.DR1 = 0; + execbuf.DR4 = DR4; + execbuf.flags = flags; + if (ctx == NULL) + i915_execbuffer2_set_context_id(execbuf, 0); + else + i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); + execbuf.rsvd2 = 0; + if (in_fence != -1) { + execbuf.rsvd2 = in_fence; + execbuf.flags |= I915_EXEC_FENCE_IN; + } + if (out_fence != NULL) { + *out_fence = -1; + execbuf.flags |= I915_EXEC_FENCE_OUT; + } + + if (bufmgr_gem->no_exec) + goto skip_execution; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, + &execbuf); + if (ret != 0) { + ret = -errno; + if (ret == -ENOSPC) { + DBG("Execbuffer fails to pin. " + "Estimate: %u. Actual: %u. Available: %u\n", + drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem->exec_count), + drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, + bufmgr_gem->exec_count), + (unsigned int) bufmgr_gem->gtt_size); + } + } + drm_intel_update_buffer_offsets2(bufmgr_gem); + + if (ret == 0 && out_fence != NULL) + *out_fence = execbuf.rsvd2 >> 32; + +skip_execution: + if (bufmgr_gem->bufmgr.debug) + drm_intel_gem_dump_validation_list(bufmgr_gem); + + for (i = 0; i < bufmgr_gem->exec_count; i++) { + drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); + + bo_gem->idle = false; + + /* Disconnect the buffer from the validate list */ + bo_gem->validate_index = -1; + bufmgr_gem->exec_bos[i] = NULL; + } + bufmgr_gem->exec_count = 0; + pthread_mutex_unlock(&bufmgr_gem->lock); + + return ret; +} + +static int +drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, + int DR4) +{ + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + -1, NULL, I915_EXEC_RENDER); +} + +static int +drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, int DR4, + unsigned int flags) +{ + return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, + -1, NULL, flags); +} + +drm_public int +drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, + int used, unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags); +} + +drm_public int +drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, + drm_intel_context *ctx, + int used, + int in_fence, + int *out_fence, + unsigned int flags) +{ + return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags); +} + +static int +drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_pin pin; + int ret; + + memclear(pin); + pin.handle = bo_gem->gem_handle; + pin.alignment = alignment; + + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_PIN, + &pin); + if (ret != 0) + return -errno; + + bo->offset64 = pin.offset; + bo->offset = pin.offset; + return 0; +} + +static int +drm_intel_gem_bo_unpin(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_unpin unpin; + int ret; + + memclear(unpin); + unpin.handle = bo_gem->gem_handle; + + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); + if (ret != 0) + return -errno; + + return 0; +} + +static int +drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, + uint32_t tiling_mode, + uint32_t stride) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + if (bo_gem->global_name == 0 && + tiling_mode == bo_gem->tiling_mode && + stride == bo_gem->stride) + return 0; + + memset(&set_tiling, 0, sizeof(set_tiling)); + do { + /* set_tiling is slightly broken and overwrites the + * input on the error path, so we have to open code + * rmIoctl. + */ + set_tiling.handle = bo_gem->gem_handle; + set_tiling.tiling_mode = tiling_mode; + set_tiling.stride = stride; + + ret = ioctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_SET_TILING, + &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + if (ret == -1) + return -errno; + + bo_gem->tiling_mode = set_tiling.tiling_mode; + bo_gem->swizzle_mode = set_tiling.swizzle_mode; + bo_gem->stride = set_tiling.stride; + return 0; +} + +static int +drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t stride) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int ret; + + /* Tiling with userptr surfaces is not supported + * on all hardware so refuse it for time being. + */ + if (bo_gem->is_userptr) + return -EINVAL; + + /* Linear buffers have no stride. By ensuring that we only ever use + * stride 0 with linear buffers, we simplify our code. + */ + if (*tiling_mode == I915_TILING_NONE) + stride = 0; + + ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); + if (ret == 0) + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + + *tiling_mode = bo_gem->tiling_mode; + return ret; +} + +static int +drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + *tiling_mode = bo_gem->tiling_mode; + *swizzle_mode = bo_gem->swizzle_mode; + return 0; +} + +static int +drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo->offset64 = offset; + bo->offset = offset; + bo_gem->kflags |= EXEC_OBJECT_PINNED; + + return 0; +} + +drm_public drm_intel_bo * +drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + int ret; + uint32_t handle; + drm_intel_bo_gem *bo_gem; + + pthread_mutex_lock(&bufmgr_gem->lock); + ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); + if (ret) { + DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno)); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; + } + + /* + * See if the kernel has already returned this buffer to us. Just as + * for named buffers, we must not create two bo's pointing at the same + * kernel object + */ + HASH_FIND(handle_hh, bufmgr_gem->handle_table, + &handle, sizeof(handle), bo_gem); + if (bo_gem) { + drm_intel_gem_bo_reference(&bo_gem->bo); + goto out; + } + + bo_gem = calloc(1, sizeof(*bo_gem)); + if (!bo_gem) + goto out; + + atomic_set(&bo_gem->refcount, 1); + DRMINITLISTHEAD(&bo_gem->vma_list); + + /* Determine size of bo. The fd-to-handle ioctl really should + * return the size, but it doesn't. If we have kernel 3.12 or + * later, we can lseek on the prime fd to get the size. Older + * kernels will just fail, in which case we fall back to the + * provided (estimated or guess size). */ + ret = lseek(prime_fd, 0, SEEK_END); + if (ret != -1) + bo_gem->bo.size = ret; + else + bo_gem->bo.size = size; + + bo_gem->bo.handle = handle; + bo_gem->bo.bufmgr = bufmgr; + + bo_gem->gem_handle = handle; + HASH_ADD(handle_hh, bufmgr_gem->handle_table, + gem_handle, sizeof(bo_gem->gem_handle), bo_gem); + + bo_gem->name = "prime"; + bo_gem->validate_index = -1; + bo_gem->reloc_tree_fences = 0; + bo_gem->used_as_reloc_target = false; + bo_gem->has_error = false; + bo_gem->reusable = false; + + ret = get_tiling_mode(bufmgr_gem, handle, + &bo_gem->tiling_mode, &bo_gem->swizzle_mode); + if (ret) + goto err; + + /* XXX stride is unknown */ + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); + +out: + pthread_mutex_unlock(&bufmgr_gem->lock); + return &bo_gem->bo; + +err: + drm_intel_gem_bo_free(&bo_gem->bo); + pthread_mutex_unlock(&bufmgr_gem->lock); + return NULL; +} + +drm_public int +drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, + DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0) + return -errno; + + bo_gem->reusable = false; + + return 0; +} + +static int +drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (!bo_gem->global_name) { + struct drm_gem_flink flink; + + memclear(flink); + flink.handle = bo_gem->gem_handle; + if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink)) + return -errno; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->global_name) { + bo_gem->global_name = flink.name; + bo_gem->reusable = false; + + HASH_ADD(name_hh, bufmgr_gem->name_table, + global_name, sizeof(bo_gem->global_name), + bo_gem); + } + pthread_mutex_unlock(&bufmgr_gem->lock); + } + + *name = bo_gem->global_name; + return 0; +} + +/** + * Enables unlimited caching of buffer objects for reuse. + * + * This is potentially very memory expensive, as the cache at each bucket + * size is only bounded by how many buffers of that size we've managed to have + * in flight at once. + */ +drm_public void +drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + + bufmgr_gem->bo_reuse = true; +} + +/** + * Disables implicit synchronisation before executing the bo + * + * This will cause rendering corruption unless you correctly manage explicit + * fences for all rendering involving this buffer - including use by others. + * Disabling the implicit serialisation is only required if that serialisation + * is too coarse (for example, you have split the buffer into many + * non-overlapping regions and are sharing the whole buffer between concurrent + * independent command streams). + * + * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC, + * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync, + * or subsequent execbufs involving the bo will generate EINVAL. + */ +drm_public void +drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags |= EXEC_OBJECT_ASYNC; +} + +/** + * Enables implicit synchronisation before executing the bo + * + * This is the default behaviour of the kernel, to wait upon prior writes + * completing on the object before rendering with it, or to wait for prior + * reads to complete before writing into the object. + * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling + * the kernel never to insert a stall before using the object. Then this + * function can be used to restore the implicit sync before subsequent + * rendering. + */ +drm_public void +drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->kflags &= ~EXEC_OBJECT_ASYNC; +} + +/** + * Query whether the kernel supports disabling of its implicit synchronisation + * before execbuf. See drm_intel_gem_bo_disable_implicit_sync() + */ +drm_public int +drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; + + return bufmgr_gem->has_exec_async; +} + +/** + * Enable use of fenced reloc type. + * + * New code should enable this to avoid unnecessary fence register + * allocation. If this option is not enabled, all relocs will have fence + * register allocated. + */ +drm_public void +drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + bufmgr_gem->fenced_relocs = true; +} + +/** + * Return the additional aperture space required by the tree of buffer objects + * rooted at bo. + */ +static int +drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + int total = 0; + + if (bo == NULL || bo_gem->included_in_check_aperture) + return 0; + + total += bo->size; + bo_gem->included_in_check_aperture = true; + + for (i = 0; i < bo_gem->reloc_count; i++) + total += + drm_intel_gem_bo_get_aperture_space(bo_gem-> + reloc_target_info[i].bo); + + return total; +} + +/** + * Count the number of buffers in this list that need a fence reg + * + * If the count is greater than the number of available regs, we'll have + * to ask the caller to resubmit a batch with fewer tiled buffers. + * + * This function over-counts if the same buffer is used multiple times. + */ +static unsigned int +drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) +{ + int i; + unsigned int total = 0; + + for (i = 0; i < count; i++) { + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; + + if (bo_gem == NULL) + continue; + + total += bo_gem->reloc_tree_fences; + } + return total; +} + +/** + * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready + * for the next drm_intel_bufmgr_check_aperture_space() call. + */ +static void +drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + + if (bo == NULL || !bo_gem->included_in_check_aperture) + return; + + bo_gem->included_in_check_aperture = false; + + for (i = 0; i < bo_gem->reloc_count; i++) + drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> + reloc_target_info[i].bo); +} + +/** + * Return a conservative estimate for the amount of aperture required + * for a collection of buffers. This may double-count some buffers. + */ +static unsigned int +drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) +{ + int i; + unsigned int total = 0; + + for (i = 0; i < count; i++) { + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; + if (bo_gem != NULL) + total += bo_gem->reloc_tree_size; + } + return total; +} + +/** + * Return the amount of aperture needed for a collection of buffers. + * This avoids double counting any buffers, at the cost of looking + * at every buffer in the set. + */ +static unsigned int +drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) +{ + int i; + unsigned int total = 0; + + for (i = 0; i < count; i++) { + total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); + /* For the first buffer object in the array, we get an + * accurate count back for its reloc_tree size (since nothing + * had been flagged as being counted yet). We can save that + * value out as a more conservative reloc_tree_size that + * avoids double-counting target buffers. Since the first + * buffer happens to usually be the batch buffer in our + * callers, this can pull us back from doing the tree + * walk on every new batch emit. + */ + if (i == 0) { + drm_intel_bo_gem *bo_gem = + (drm_intel_bo_gem *) bo_array[i]; + bo_gem->reloc_tree_size = total; + } + } + + for (i = 0; i < count; i++) + drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); + return total; +} + +/** + * Return -1 if the batchbuffer should be flushed before attempting to + * emit rendering referencing the buffers pointed to by bo_array. + * + * This is required because if we try to emit a batchbuffer with relocations + * to a tree of buffers that won't simultaneously fit in the aperture, + * the rendering will return an error at a point where the software is not + * prepared to recover from it. + * + * However, we also want to emit the batchbuffer significantly before we reach + * the limit, as a series of batchbuffers each of which references buffers + * covering almost all of the aperture means that at each emit we end up + * waiting to evict a buffer from the last rendering, and we get synchronous + * performance. By emitting smaller batchbuffers, we eat some CPU overhead to + * get better parallelism. + */ +static int +drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) +{ + drm_intel_bufmgr_gem *bufmgr_gem = + (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; + unsigned int total = 0; + unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; + int total_fences; + + /* Check for fence reg constraints if necessary */ + if (bufmgr_gem->available_fences) { + total_fences = drm_intel_gem_total_fences(bo_array, count); + if (total_fences > bufmgr_gem->available_fences) + return -ENOSPC; + } + + total = drm_intel_gem_estimate_batch_space(bo_array, count); + + if (total > threshold) + total = drm_intel_gem_compute_batch_space(bo_array, count); + + if (total > threshold) { + DBG("check_space: overflowed available aperture, " + "%dkb vs %dkb\n", + total / 1024, (int)bufmgr_gem->gtt_size / 1024); + return -ENOSPC; + } else { + DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, + (int)bufmgr_gem->gtt_size / 1024); + return 0; + } +} + +/* + * Disable buffer reuse for objects which are shared with the kernel + * as scanout buffers + */ +static int +drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + bo_gem->reusable = false; + return 0; +} + +static int +drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + return bo_gem->reusable; +} + +static int +_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + int i; + + for (i = 0; i < bo_gem->reloc_count; i++) { + if (bo_gem->reloc_target_info[i].bo == target_bo) + return 1; + if (bo == bo_gem->reloc_target_info[i].bo) + continue; + if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, + target_bo)) + return 1; + } + + for (i = 0; i< bo_gem->softpin_target_count; i++) { + if (bo_gem->softpin_target[i] == target_bo) + return 1; + if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo)) + return 1; + } + + return 0; +} + +/** Return true if target_bo is referenced by bo's relocation tree. */ +static int +drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) +{ + drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; + + if (bo == NULL || target_bo == NULL) + return 0; + if (target_bo_gem->used_as_reloc_target) + return _drm_intel_gem_bo_references(bo, target_bo); + return 0; +} + +static void +add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) +{ + unsigned int i = bufmgr_gem->num_buckets; + + assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); + + DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); + bufmgr_gem->cache_bucket[i].size = size; + bufmgr_gem->num_buckets++; +} + +static void +init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) +{ + unsigned long size, cache_max_size = 64 * 1024 * 1024; + + /* OK, so power of two buckets was too wasteful of memory. + * Give 3 other sizes between each power of two, to hopefully + * cover things accurately enough. (The alternative is + * probably to just go for exact matching of sizes, and assume + * that for things like composited window resize the tiled + * width/height alignment and rounding of sizes to pages will + * get us useful cache hit rates anyway) + */ + add_bucket(bufmgr_gem, 4096); + add_bucket(bufmgr_gem, 4096 * 2); + add_bucket(bufmgr_gem, 4096 * 3); + + /* Initialize the linked lists for BO reuse cache. */ + for (size = 4 * 4096; size <= cache_max_size; size *= 2) { + add_bucket(bufmgr_gem, size); + + add_bucket(bufmgr_gem, size + size * 1 / 4); + add_bucket(bufmgr_gem, size + size * 2 / 4); + add_bucket(bufmgr_gem, size + size * 3 / 4); + } +} + +drm_public void +drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + bufmgr_gem->vma_max = limit; + + drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); +} + +static int +parse_devid_override(const char *devid_override) +{ + static const struct { + const char *name; + int pci_id; + } name_map[] = { + { "brw", PCI_CHIP_I965_GM }, + { "g4x", PCI_CHIP_GM45_GM }, + { "ilk", PCI_CHIP_ILD_G }, + { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS }, + { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 }, + { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 }, + { "byt", PCI_CHIP_VALLEYVIEW_3 }, + { "bdw", 0x1620 | BDW_ULX }, + { "skl", PCI_CHIP_SKYLAKE_DT_GT2 }, + { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 }, + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(name_map); i++) { + if (!strcmp(name_map[i].name, devid_override)) + return name_map[i].pci_id; + } + + return strtod(devid_override, NULL); +} + +/** + * Get the PCI ID for the device. This can be overridden by setting the + * INTEL_DEVID_OVERRIDE environment variable to the desired ID. + */ +static int +get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) +{ + char *devid_override; + int devid = 0; + int ret; + drm_i915_getparam_t gp; + + if (geteuid() == getuid()) { + devid_override = getenv("INTEL_DEVID_OVERRIDE"); + if (devid_override) { + bufmgr_gem->no_exec = true; + return parse_devid_override(devid_override); + } + } + + memclear(gp); + gp.param = I915_PARAM_CHIPSET_ID; + gp.value = &devid; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) { + fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); + fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); + } + return devid; +} + +drm_public int +drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + return bufmgr_gem->pci_device; +} + +/** + * Sets the AUB filename. + * + * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() + * for it to have any effect. + */ +drm_public void +drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, + const char *filename) +{ +} + +/** + * Sets up AUB dumping. + * + * This is a trace file format that can be used with the simulator. + * Packets are emitted in a format somewhat like GPU command packets. + * You can set up a GTT and upload your objects into the referenced + * space, then send off batchbuffers and get BMPs out the other end. + */ +drm_public void +drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) +{ + fprintf(stderr, "libdrm aub dumping is deprecated.\n\n" + "Use intel_aubdump from intel-gpu-tools instead. Install intel-gpu-tools,\n" + "then run (for example)\n\n" + "\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n" + "See the intel_aubdump man page for more details.\n"); +} + +drm_public drm_intel_context * +drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_gem_context_create create; + drm_intel_context *context = NULL; + int ret; + + context = calloc(1, sizeof(*context)); + if (!context) + return NULL; + + memclear(create); + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); + if (ret != 0) { + DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", + strerror(errno)); + free(context); + return NULL; + } + + context->ctx_id = create.ctx_id; + context->bufmgr = bufmgr; + + return context; +} + +drm_public int +drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id) +{ + if (ctx == NULL) + return -EINVAL; + + *ctx_id = ctx->ctx_id; + + return 0; +} + +drm_public void +drm_intel_gem_context_destroy(drm_intel_context *ctx) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_gem_context_destroy destroy; + int ret; + + if (ctx == NULL) + return; + + memclear(destroy); + + bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; + destroy.ctx_id = ctx->ctx_id; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, + &destroy); + if (ret != 0) + fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", + strerror(errno)); + + free(ctx); +} + +drm_public int +drm_intel_get_reset_stats(drm_intel_context *ctx, + uint32_t *reset_count, + uint32_t *active, + uint32_t *pending) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_reset_stats stats; + int ret; + + if (ctx == NULL) + return -EINVAL; + + memclear(stats); + + bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; + stats.ctx_id = ctx->ctx_id; + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GET_RESET_STATS, + &stats); + if (ret == 0) { + if (reset_count != NULL) + *reset_count = stats.reset_count; + + if (active != NULL) + *active = stats.batch_active; + + if (pending != NULL) + *pending = stats.batch_pending; + } + + return ret; +} + +drm_public int +drm_intel_reg_read(drm_intel_bufmgr *bufmgr, + uint32_t offset, + uint64_t *result) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + struct drm_i915_reg_read reg_read; + int ret; + + memclear(reg_read); + reg_read.offset = offset; + + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); + + *result = reg_read.val; + return ret; +} + +drm_public int +drm_intel_get_subslice_total(int fd, unsigned int *subslice_total) +{ + drm_i915_getparam_t gp; + int ret; + + memclear(gp); + gp.value = (int*)subslice_total; + gp.param = I915_PARAM_SUBSLICE_TOTAL; + ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return -errno; + + return 0; +} + +drm_public int +drm_intel_get_eu_total(int fd, unsigned int *eu_total) +{ + drm_i915_getparam_t gp; + int ret; + + memclear(gp); + gp.value = (int*)eu_total; + gp.param = I915_PARAM_EU_TOTAL; + ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) + return -errno; + + return 0; +} + +drm_public int +drm_intel_get_pooled_eu(int fd) +{ + drm_i915_getparam_t gp; + int ret = -1; + + memclear(gp); + gp.param = I915_PARAM_HAS_POOLED_EU; + gp.value = &ret; + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return -errno; + + return ret; +} + +drm_public int +drm_intel_get_min_eu_in_pool(int fd) +{ + drm_i915_getparam_t gp; + int ret = -1; + + memclear(gp); + gp.param = I915_PARAM_MIN_EU_IN_POOL; + gp.value = &ret; + if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) + return -errno; + + return ret; +} + +/** + * Annotate the given bo for use in aub dumping. + * + * \param annotations is an array of drm_intel_aub_annotation objects + * describing the type of data in various sections of the bo. Each + * element of the array specifies the type and subtype of a section of + * the bo, and the past-the-end offset of that section. The elements + * of \c annotations must be sorted so that ending_offset is + * increasing. + * + * \param count is the number of elements in the \c annotations array. + * If \c count is zero, then \c annotations will not be dereferenced. + * + * Annotations are copied into a private data structure, so caller may + * re-use the memory pointed to by \c annotations after the call + * returns. + * + * Annotations are stored for the lifetime of the bo; to reset to the + * default state (no annotations), call this function with a \c count + * of zero. + */ +drm_public void drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, + drm_intel_aub_annotation *annotations, + unsigned count) +{ +} + +static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; +static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list }; + +static drm_intel_bufmgr_gem * +drm_intel_bufmgr_gem_find(int fd) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + + DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) { + if (bufmgr_gem->fd == fd) { + atomic_inc(&bufmgr_gem->refcount); + return bufmgr_gem; + } + } + + return NULL; +} + +static void +drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; + + if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) + return; + + pthread_mutex_lock(&bufmgr_list_mutex); + + if (atomic_dec_and_test(&bufmgr_gem->refcount)) { + DRMLISTDEL(&bufmgr_gem->managers); + drm_intel_bufmgr_gem_destroy(bufmgr); + } + + pthread_mutex_unlock(&bufmgr_list_mutex); +} + +drm_public void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->gtt_virtual) + return bo_gem->gtt_virtual; + + if (bo_gem->is_userptr) + return NULL; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (bo_gem->gtt_virtual == NULL) { + struct drm_i915_gem_mmap_gtt mmap_arg; + void *ptr; + + DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + + /* Get the fake offset back... */ + ptr = MAP_FAILED; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP_GTT, + &mmap_arg) == 0) { + /* and mmap it */ + ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, + MAP_SHARED, bufmgr_gem->fd, + mmap_arg.offset); + } + if (ptr == MAP_FAILED) { + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + ptr = NULL; + } + + bo_gem->gtt_virtual = ptr; + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->gtt_virtual; +} + +drm_public void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->mem_virtual) + return bo_gem->mem_virtual; + + if (bo_gem->is_userptr) { + /* Return the same user ptr */ + return bo_gem->user_virtual; + } + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->mem_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg)) { + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + } else { + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->mem_virtual; +} + +drm_public void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo) +{ + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; + + if (bo_gem->wc_virtual) + return bo_gem->wc_virtual; + + if (bo_gem->is_userptr) + return NULL; + + pthread_mutex_lock(&bufmgr_gem->lock); + if (!bo_gem->wc_virtual) { + struct drm_i915_gem_mmap mmap_arg; + + if (bo_gem->map_count++ == 0) + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); + + DBG("bo_map: %d (%s), map_count=%d\n", + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); + + memclear(mmap_arg); + mmap_arg.handle = bo_gem->gem_handle; + mmap_arg.size = bo->size; + mmap_arg.flags = I915_MMAP_WC; + if (drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_MMAP, + &mmap_arg)) { + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", + __FILE__, __LINE__, bo_gem->gem_handle, + bo_gem->name, strerror(errno)); + if (--bo_gem->map_count == 0) + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); + } else { + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); + bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; + } + } + pthread_mutex_unlock(&bufmgr_gem->lock); + + return bo_gem->wc_virtual; +} + +/** + * Initializes the GEM buffer manager, which uses the kernel to allocate, map, + * and manage map buffer objections. + * + * \param fd File descriptor of the opened DRM device. + */ +drm_public drm_intel_bufmgr * +drm_intel_bufmgr_gem_init(int fd, int batch_size) +{ + drm_intel_bufmgr_gem *bufmgr_gem; + struct drm_i915_gem_get_aperture aperture; + drm_i915_getparam_t gp; + int ret, tmp; + + pthread_mutex_lock(&bufmgr_list_mutex); + + bufmgr_gem = drm_intel_bufmgr_gem_find(fd); + if (bufmgr_gem) + goto exit; + + bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); + if (bufmgr_gem == NULL) + goto exit; + + bufmgr_gem->fd = fd; + atomic_set(&bufmgr_gem->refcount, 1); + + if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { + free(bufmgr_gem); + bufmgr_gem = NULL; + goto exit; + } + + memclear(aperture); + ret = drmIoctl(bufmgr_gem->fd, + DRM_IOCTL_I915_GEM_GET_APERTURE, + &aperture); + + if (ret == 0) + bufmgr_gem->gtt_size = aperture.aper_available_size; + else { + fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", + strerror(errno)); + bufmgr_gem->gtt_size = 128 * 1024 * 1024; + fprintf(stderr, "Assuming %dkB available aperture size.\n" + "May lead to reduced performance or incorrect " + "rendering.\n", + (int)bufmgr_gem->gtt_size / 1024); + } + + bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); + + if (IS_GEN2(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 2; + else if (IS_GEN3(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 3; + else if (IS_GEN4(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 4; + else if (IS_GEN5(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 5; + else if (IS_GEN6(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 6; + else if (IS_GEN7(bufmgr_gem->pci_device)) + bufmgr_gem->gen = 7; + else + /* Treat all further unmatched platforms the same as gen8 */ + bufmgr_gem->gen = 8; + + if (IS_GEN3(bufmgr_gem->pci_device) && + bufmgr_gem->gtt_size > 256*1024*1024) { + /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't + * be used for tiled blits. To simplify the accounting, just + * subtract the unmappable part (fixed to 256MB on all known + * gen3 devices) if the kernel advertises it. */ + bufmgr_gem->gtt_size -= 256*1024*1024; + } + + memclear(gp); + gp.value = &tmp; + + gp.param = I915_PARAM_HAS_EXECBUF2; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) { + fprintf(stderr, "i915 does not support EXECBUFER2\n"); + free(bufmgr_gem); + bufmgr_gem = NULL; + goto exit; + } + + gp.param = I915_PARAM_HAS_BSD; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_bsd = ret == 0; + + gp.param = I915_PARAM_HAS_BLT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_blt = ret == 0; + + gp.param = I915_PARAM_HAS_RELAXED_FENCING; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_relaxed_fencing = ret == 0; + + gp.param = I915_PARAM_HAS_EXEC_ASYNC; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_exec_async = ret == 0; + + bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; + + gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_wait_timeout = ret == 0; + + gp.param = I915_PARAM_HAS_LLC; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret != 0) { + /* Kernel does not supports HAS_LLC query, fallback to GPU + * generation detection and assume that we have LLC on GEN6/7 + */ + bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | + IS_GEN7(bufmgr_gem->pci_device)); + } else + bufmgr_gem->has_llc = *gp.value; + + gp.param = I915_PARAM_HAS_VEBOX; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); + + gp.param = I915_PARAM_HAS_EXEC_SOFTPIN; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0 && *gp.value > 0) + bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset; + + if (bufmgr_gem->gen < 4) { + gp.param = I915_PARAM_NUM_FENCES_AVAIL; + gp.value = &bufmgr_gem->available_fences; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret) { + fprintf(stderr, "get fences failed: %d [%d]\n", ret, + errno); + fprintf(stderr, "param: %d, val: %d\n", gp.param, + *gp.value); + bufmgr_gem->available_fences = 0; + } else { + /* XXX The kernel reports the total number of fences, + * including any that may be pinned. + * + * We presume that there will be at least one pinned + * fence for the scanout buffer, but there may be more + * than one scanout and the user may be manually + * pinning buffers. Let's move to execbuffer2 and + * thereby forget the insanity of using fences... + */ + bufmgr_gem->available_fences -= 2; + if (bufmgr_gem->available_fences < 0) + bufmgr_gem->available_fences = 0; + } + } + + if (bufmgr_gem->gen >= 8) { + gp.param = I915_PARAM_HAS_ALIASING_PPGTT; + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); + if (ret == 0 && *gp.value == 3) + bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; + } + + /* Let's go with one relocation per every 2 dwords (but round down a bit + * since a power of two will mean an extra page allocation for the reloc + * buffer). + * + * Every 4 was too few for the blender benchmark. + */ + bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; + + bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; + bufmgr_gem->bufmgr.bo_alloc_for_render = + drm_intel_gem_bo_alloc_for_render; + bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; + bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; + bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; + bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; + bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; + bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; + bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; + bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; + bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; + bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; + bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; + bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; + bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; + bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; + bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; + bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; + bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; + bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; + bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; + bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref; + bufmgr_gem->bufmgr.debug = 0; + bufmgr_gem->bufmgr.check_aperture_space = + drm_intel_gem_check_aperture_space; + bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; + bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; + bufmgr_gem->bufmgr.get_pipe_from_crtc_id = + drm_intel_gem_get_pipe_from_crtc_id; + bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; + + init_cache_buckets(bufmgr_gem); + + DRMINITLISTHEAD(&bufmgr_gem->vma_cache); + bufmgr_gem->vma_max = -1; /* unlimited by default */ + + DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); + +exit: + pthread_mutex_unlock(&bufmgr_list_mutex); + + return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL; +} diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h new file mode 100644 index 0000000..baaf4bb --- /dev/null +++ b/intel/intel_bufmgr_priv.h @@ -0,0 +1,325 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +/** + * @file intel_bufmgr_priv.h + * + * Private definitions of Intel-specific bufmgr functions and structures. + */ + +#ifndef INTEL_BUFMGR_PRIV_H +#define INTEL_BUFMGR_PRIV_H + +/** + * Context for a buffer manager instance. + * + * Contains public methods followed by private storage for the buffer manager. + */ +struct _drm_intel_bufmgr { + /** + * Allocate a buffer object. + * + * Buffer objects are not necessarily initially mapped into CPU virtual + * address space or graphics device aperture. They must be mapped + * using bo_map() or drm_intel_gem_bo_map_gtt() to be used by the CPU. + */ + drm_intel_bo *(*bo_alloc) (drm_intel_bufmgr *bufmgr, const char *name, + unsigned long size, unsigned int alignment); + + /** + * Allocate a buffer object, hinting that it will be used as a + * render target. + * + * This is otherwise the same as bo_alloc. + */ + drm_intel_bo *(*bo_alloc_for_render) (drm_intel_bufmgr *bufmgr, + const char *name, + unsigned long size, + unsigned int alignment); + + /** + * Allocate a buffer object from an existing user accessible + * address malloc'd with the provided size. + * Alignment is used when mapping to the gtt. + * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED + */ + drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr, + const char *name, void *addr, + uint32_t tiling_mode, uint32_t stride, + unsigned long size, + unsigned long flags); + + /** + * Allocate a tiled buffer object. + * + * Alignment for tiled objects is set automatically; the 'flags' + * argument provides a hint about how the object will be used initially. + * + * Valid tiling formats are: + * I915_TILING_NONE + * I915_TILING_X + * I915_TILING_Y + * + * Note the tiling format may be rejected; callers should check the + * 'tiling_mode' field on return, as well as the pitch value, which + * may have been rounded up to accommodate for tiling restrictions. + */ + drm_intel_bo *(*bo_alloc_tiled) (drm_intel_bufmgr *bufmgr, + const char *name, + int x, int y, int cpp, + uint32_t *tiling_mode, + unsigned long *pitch, + unsigned long flags); + + /** Takes a reference on a buffer object */ + void (*bo_reference) (drm_intel_bo *bo); + + /** + * Releases a reference on a buffer object, freeing the data if + * no references remain. + */ + void (*bo_unreference) (drm_intel_bo *bo); + + /** + * Maps the buffer into userspace. + * + * This function will block waiting for any existing execution on the + * buffer to complete, first. The resulting mapping is available at + * buf->virtual. + */ + int (*bo_map) (drm_intel_bo *bo, int write_enable); + + /** + * Reduces the refcount on the userspace mapping of the buffer + * object. + */ + int (*bo_unmap) (drm_intel_bo *bo); + + /** + * Write data into an object. + * + * This is an optional function, if missing, + * drm_intel_bo will map/memcpy/unmap. + */ + int (*bo_subdata) (drm_intel_bo *bo, unsigned long offset, + unsigned long size, const void *data); + + /** + * Read data from an object + * + * This is an optional function, if missing, + * drm_intel_bo will map/memcpy/unmap. + */ + int (*bo_get_subdata) (drm_intel_bo *bo, unsigned long offset, + unsigned long size, void *data); + + /** + * Waits for rendering to an object by the GPU to have completed. + * + * This is not required for any access to the BO by bo_map, + * bo_subdata, etc. It is merely a way for the driver to implement + * glFinish. + */ + void (*bo_wait_rendering) (drm_intel_bo *bo); + + /** + * Tears down the buffer manager instance. + */ + void (*destroy) (drm_intel_bufmgr *bufmgr); + + /** + * Indicate if the buffer can be placed anywhere in the full ppgtt + * address range (2^48). + * + * Any resource used with flat/heapless (0x00000000-0xfffff000) + * General State Heap (GSH) or Instructions State Heap (ISH) must + * be in a 32-bit range. 48-bit range will only be used when explicitly + * requested. + * + * \param bo Buffer to set the use_48b_address_range flag. + * \param enable The flag value. + */ + void (*bo_use_48b_address_range) (drm_intel_bo *bo, uint32_t enable); + + /** + * Add relocation entry in reloc_buf, which will be updated with the + * target buffer's real offset on on command submission. + * + * Relocations remain in place for the lifetime of the buffer object. + * + * \param bo Buffer to write the relocation into. + * \param offset Byte offset within reloc_bo of the pointer to + * target_bo. + * \param target_bo Buffer whose offset should be written into the + * relocation entry. + * \param target_offset Constant value to be added to target_bo's + * offset in relocation entry. + * \param read_domains GEM read domains which the buffer will be + * read into by the command that this relocation + * is part of. + * \param write_domains GEM read domains which the buffer will be + * dirtied in by the command that this + * relocation is part of. + */ + int (*bo_emit_reloc) (drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, uint32_t target_offset, + uint32_t read_domains, uint32_t write_domain); + int (*bo_emit_reloc_fence)(drm_intel_bo *bo, uint32_t offset, + drm_intel_bo *target_bo, + uint32_t target_offset, + uint32_t read_domains, + uint32_t write_domain); + + /** Executes the command buffer pointed to by bo. */ + int (*bo_exec) (drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, + int DR4); + + /** Executes the command buffer pointed to by bo on the selected + * ring buffer + */ + int (*bo_mrb_exec) (drm_intel_bo *bo, int used, + drm_clip_rect_t *cliprects, int num_cliprects, + int DR4, unsigned flags); + + /** + * Pin a buffer to the aperture and fix the offset until unpinned + * + * \param buf Buffer to pin + * \param alignment Required alignment for aperture, in bytes + */ + int (*bo_pin) (drm_intel_bo *bo, uint32_t alignment); + + /** + * Unpin a buffer from the aperture, allowing it to be removed + * + * \param buf Buffer to unpin + */ + int (*bo_unpin) (drm_intel_bo *bo); + + /** + * Ask that the buffer be placed in tiling mode + * + * \param buf Buffer to set tiling mode for + * \param tiling_mode desired, and returned tiling mode + */ + int (*bo_set_tiling) (drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t stride); + + /** + * Get the current tiling (and resulting swizzling) mode for the bo. + * + * \param buf Buffer to get tiling mode for + * \param tiling_mode returned tiling mode + * \param swizzle_mode returned swizzling mode + */ + int (*bo_get_tiling) (drm_intel_bo *bo, uint32_t * tiling_mode, + uint32_t * swizzle_mode); + + /** + * Set the offset at which this buffer will be softpinned + * \param bo Buffer to set the softpin offset for + * \param offset Softpin offset + */ + int (*bo_set_softpin_offset) (drm_intel_bo *bo, uint64_t offset); + + /** + * Create a visible name for a buffer which can be used by other apps + * + * \param buf Buffer to create a name for + * \param name Returned name + */ + int (*bo_flink) (drm_intel_bo *bo, uint32_t * name); + + /** + * Returns 1 if mapping the buffer for write could cause the process + * to block, due to the object being active in the GPU. + */ + int (*bo_busy) (drm_intel_bo *bo); + + /** + * Specify the volatility of the buffer. + * \param bo Buffer to create a name for + * \param madv The purgeable status + * + * Use I915_MADV_DONTNEED to mark the buffer as purgeable, and it will be + * reclaimed under memory pressure. If you subsequently require the buffer, + * then you must pass I915_MADV_WILLNEED to mark the buffer as required. + * + * Returns 1 if the buffer was retained, or 0 if it was discarded whilst + * marked as I915_MADV_DONTNEED. + */ + int (*bo_madvise) (drm_intel_bo *bo, int madv); + + int (*check_aperture_space) (drm_intel_bo ** bo_array, int count); + + /** + * Disable buffer reuse for buffers which will be shared in some way, + * as with scanout buffers. When the buffer reference count goes to + * zero, it will be freed and not placed in the reuse list. + * + * \param bo Buffer to disable reuse for + */ + int (*bo_disable_reuse) (drm_intel_bo *bo); + + /** + * Query whether a buffer is reusable. + * + * \param bo Buffer to query + */ + int (*bo_is_reusable) (drm_intel_bo *bo); + + /** + * + * Return the pipe associated with a crtc_id so that vblank + * synchronization can use the correct data in the request. + * This is only supported for KMS and gem at this point, when + * unsupported, this function returns -1 and leaves the decision + * of what to do in that case to the caller + * + * \param bufmgr the associated buffer manager + * \param crtc_id the crtc identifier + */ + int (*get_pipe_from_crtc_id) (drm_intel_bufmgr *bufmgr, int crtc_id); + + /** Returns true if target_bo is in the relocation tree rooted at bo. */ + int (*bo_references) (drm_intel_bo *bo, drm_intel_bo *target_bo); + + /**< Enables verbose debugging printouts */ + int debug; +}; + +struct _drm_intel_context { + unsigned int ctx_id; + struct _drm_intel_bufmgr *bufmgr; +}; + +#define ALIGN(value, alignment) ((value + alignment - 1) & ~(alignment - 1)) +#define ROUND_UP_TO(x, y) (((x) + (y) - 1) / (y) * (y)) +#define ROUND_UP_TO_MB(x) ROUND_UP_TO((x), 1024*1024) + +#endif /* INTEL_BUFMGR_PRIV_H */ diff --git a/intel/intel_chipset.h b/intel/intel_chipset.h new file mode 100644 index 0000000..bbf40b9 --- /dev/null +++ b/intel/intel_chipset.h @@ -0,0 +1,336 @@ +/* + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _INTEL_CHIPSET_H +#define _INTEL_CHIPSET_H + +#define PCI_CHIP_I810 0x7121 +#define PCI_CHIP_I810_DC100 0x7123 +#define PCI_CHIP_I810_E 0x7125 +#define PCI_CHIP_I815 0x1132 + +#define PCI_CHIP_I830_M 0x3577 +#define PCI_CHIP_845_G 0x2562 +#define PCI_CHIP_I855_GM 0x3582 +#define PCI_CHIP_I865_G 0x2572 + +#define PCI_CHIP_I915_G 0x2582 +#define PCI_CHIP_E7221_G 0x258A +#define PCI_CHIP_I915_GM 0x2592 +#define PCI_CHIP_I945_G 0x2772 +#define PCI_CHIP_I945_GM 0x27A2 +#define PCI_CHIP_I945_GME 0x27AE + +#define PCI_CHIP_Q35_G 0x29B2 +#define PCI_CHIP_G33_G 0x29C2 +#define PCI_CHIP_Q33_G 0x29D2 + +#define PCI_CHIP_IGD_GM 0xA011 +#define PCI_CHIP_IGD_G 0xA001 + +#define IS_IGDGM(devid) ((devid) == PCI_CHIP_IGD_GM) +#define IS_IGDG(devid) ((devid) == PCI_CHIP_IGD_G) +#define IS_IGD(devid) (IS_IGDG(devid) || IS_IGDGM(devid)) + +#define PCI_CHIP_I965_G 0x29A2 +#define PCI_CHIP_I965_Q 0x2992 +#define PCI_CHIP_I965_G_1 0x2982 +#define PCI_CHIP_I946_GZ 0x2972 +#define PCI_CHIP_I965_GM 0x2A02 +#define PCI_CHIP_I965_GME 0x2A12 + +#define PCI_CHIP_GM45_GM 0x2A42 + +#define PCI_CHIP_IGD_E_G 0x2E02 +#define PCI_CHIP_Q45_G 0x2E12 +#define PCI_CHIP_G45_G 0x2E22 +#define PCI_CHIP_G41_G 0x2E32 + +#define PCI_CHIP_ILD_G 0x0042 +#define PCI_CHIP_ILM_G 0x0046 + +#define PCI_CHIP_SANDYBRIDGE_GT1 0x0102 /* desktop */ +#define PCI_CHIP_SANDYBRIDGE_GT2 0x0112 +#define PCI_CHIP_SANDYBRIDGE_GT2_PLUS 0x0122 +#define PCI_CHIP_SANDYBRIDGE_M_GT1 0x0106 /* mobile */ +#define PCI_CHIP_SANDYBRIDGE_M_GT2 0x0116 +#define PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS 0x0126 +#define PCI_CHIP_SANDYBRIDGE_S 0x010A /* server */ + +#define PCI_CHIP_IVYBRIDGE_GT1 0x0152 /* desktop */ +#define PCI_CHIP_IVYBRIDGE_GT2 0x0162 +#define PCI_CHIP_IVYBRIDGE_M_GT1 0x0156 /* mobile */ +#define PCI_CHIP_IVYBRIDGE_M_GT2 0x0166 +#define PCI_CHIP_IVYBRIDGE_S 0x015a /* server */ +#define PCI_CHIP_IVYBRIDGE_S_GT2 0x016a /* server */ + +#define PCI_CHIP_HASWELL_GT1 0x0402 /* Desktop */ +#define PCI_CHIP_HASWELL_GT2 0x0412 +#define PCI_CHIP_HASWELL_GT3 0x0422 +#define PCI_CHIP_HASWELL_M_GT1 0x0406 /* Mobile */ +#define PCI_CHIP_HASWELL_M_GT2 0x0416 +#define PCI_CHIP_HASWELL_M_GT3 0x0426 +#define PCI_CHIP_HASWELL_S_GT1 0x040A /* Server */ +#define PCI_CHIP_HASWELL_S_GT2 0x041A +#define PCI_CHIP_HASWELL_S_GT3 0x042A +#define PCI_CHIP_HASWELL_B_GT1 0x040B /* Reserved */ +#define PCI_CHIP_HASWELL_B_GT2 0x041B +#define PCI_CHIP_HASWELL_B_GT3 0x042B +#define PCI_CHIP_HASWELL_E_GT1 0x040E /* Reserved */ +#define PCI_CHIP_HASWELL_E_GT2 0x041E +#define PCI_CHIP_HASWELL_E_GT3 0x042E +#define PCI_CHIP_HASWELL_SDV_GT1 0x0C02 /* Desktop */ +#define PCI_CHIP_HASWELL_SDV_GT2 0x0C12 +#define PCI_CHIP_HASWELL_SDV_GT3 0x0C22 +#define PCI_CHIP_HASWELL_SDV_M_GT1 0x0C06 /* Mobile */ +#define PCI_CHIP_HASWELL_SDV_M_GT2 0x0C16 +#define PCI_CHIP_HASWELL_SDV_M_GT3 0x0C26 +#define PCI_CHIP_HASWELL_SDV_S_GT1 0x0C0A /* Server */ +#define PCI_CHIP_HASWELL_SDV_S_GT2 0x0C1A +#define PCI_CHIP_HASWELL_SDV_S_GT3 0x0C2A +#define PCI_CHIP_HASWELL_SDV_B_GT1 0x0C0B /* Reserved */ +#define PCI_CHIP_HASWELL_SDV_B_GT2 0x0C1B +#define PCI_CHIP_HASWELL_SDV_B_GT3 0x0C2B +#define PCI_CHIP_HASWELL_SDV_E_GT1 0x0C0E /* Reserved */ +#define PCI_CHIP_HASWELL_SDV_E_GT2 0x0C1E +#define PCI_CHIP_HASWELL_SDV_E_GT3 0x0C2E +#define PCI_CHIP_HASWELL_ULT_GT1 0x0A02 /* Desktop */ +#define PCI_CHIP_HASWELL_ULT_GT2 0x0A12 +#define PCI_CHIP_HASWELL_ULT_GT3 0x0A22 +#define PCI_CHIP_HASWELL_ULT_M_GT1 0x0A06 /* Mobile */ +#define PCI_CHIP_HASWELL_ULT_M_GT2 0x0A16 +#define PCI_CHIP_HASWELL_ULT_M_GT3 0x0A26 +#define PCI_CHIP_HASWELL_ULT_S_GT1 0x0A0A /* Server */ +#define PCI_CHIP_HASWELL_ULT_S_GT2 0x0A1A +#define PCI_CHIP_HASWELL_ULT_S_GT3 0x0A2A +#define PCI_CHIP_HASWELL_ULT_B_GT1 0x0A0B /* Reserved */ +#define PCI_CHIP_HASWELL_ULT_B_GT2 0x0A1B +#define PCI_CHIP_HASWELL_ULT_B_GT3 0x0A2B +#define PCI_CHIP_HASWELL_ULT_E_GT1 0x0A0E /* Reserved */ +#define PCI_CHIP_HASWELL_ULT_E_GT2 0x0A1E +#define PCI_CHIP_HASWELL_ULT_E_GT3 0x0A2E +#define PCI_CHIP_HASWELL_CRW_GT1 0x0D02 /* Desktop */ +#define PCI_CHIP_HASWELL_CRW_GT2 0x0D12 +#define PCI_CHIP_HASWELL_CRW_GT3 0x0D22 +#define PCI_CHIP_HASWELL_CRW_M_GT1 0x0D06 /* Mobile */ +#define PCI_CHIP_HASWELL_CRW_M_GT2 0x0D16 +#define PCI_CHIP_HASWELL_CRW_M_GT3 0x0D26 +#define PCI_CHIP_HASWELL_CRW_S_GT1 0x0D0A /* Server */ +#define PCI_CHIP_HASWELL_CRW_S_GT2 0x0D1A +#define PCI_CHIP_HASWELL_CRW_S_GT3 0x0D2A +#define PCI_CHIP_HASWELL_CRW_B_GT1 0x0D0B /* Reserved */ +#define PCI_CHIP_HASWELL_CRW_B_GT2 0x0D1B +#define PCI_CHIP_HASWELL_CRW_B_GT3 0x0D2B +#define PCI_CHIP_HASWELL_CRW_E_GT1 0x0D0E /* Reserved */ +#define PCI_CHIP_HASWELL_CRW_E_GT2 0x0D1E +#define PCI_CHIP_HASWELL_CRW_E_GT3 0x0D2E +#define BDW_SPARE 0x2 +#define BDW_ULT 0x6 +#define BDW_SERVER 0xa +#define BDW_IRIS 0xb +#define BDW_WORKSTATION 0xd +#define BDW_ULX 0xe + +#define PCI_CHIP_VALLEYVIEW_PO 0x0f30 /* VLV PO board */ +#define PCI_CHIP_VALLEYVIEW_1 0x0f31 +#define PCI_CHIP_VALLEYVIEW_2 0x0f32 +#define PCI_CHIP_VALLEYVIEW_3 0x0f33 + +#define PCI_CHIP_CHERRYVIEW_0 0x22b0 +#define PCI_CHIP_CHERRYVIEW_1 0x22b1 +#define PCI_CHIP_CHERRYVIEW_2 0x22b2 +#define PCI_CHIP_CHERRYVIEW_3 0x22b3 + +#define PCI_CHIP_SKYLAKE_DT_GT2 0x1912 +#define PCI_CHIP_KABYLAKE_DT_GT2 0x5912 + +#define IS_MOBILE(devid) ((devid) == PCI_CHIP_I855_GM || \ + (devid) == PCI_CHIP_I915_GM || \ + (devid) == PCI_CHIP_I945_GM || \ + (devid) == PCI_CHIP_I945_GME || \ + (devid) == PCI_CHIP_I965_GM || \ + (devid) == PCI_CHIP_I965_GME || \ + (devid) == PCI_CHIP_GM45_GM || IS_IGD(devid) || \ + (devid) == PCI_CHIP_IVYBRIDGE_M_GT1 || \ + (devid) == PCI_CHIP_IVYBRIDGE_M_GT2) + +#define IS_G45(devid) ((devid) == PCI_CHIP_IGD_E_G || \ + (devid) == PCI_CHIP_Q45_G || \ + (devid) == PCI_CHIP_G45_G || \ + (devid) == PCI_CHIP_G41_G) +#define IS_GM45(devid) ((devid) == PCI_CHIP_GM45_GM) +#define IS_G4X(devid) (IS_G45(devid) || IS_GM45(devid)) + +#define IS_ILD(devid) ((devid) == PCI_CHIP_ILD_G) +#define IS_ILM(devid) ((devid) == PCI_CHIP_ILM_G) + +#define IS_915(devid) ((devid) == PCI_CHIP_I915_G || \ + (devid) == PCI_CHIP_E7221_G || \ + (devid) == PCI_CHIP_I915_GM) + +#define IS_945GM(devid) ((devid) == PCI_CHIP_I945_GM || \ + (devid) == PCI_CHIP_I945_GME) + +#define IS_945(devid) ((devid) == PCI_CHIP_I945_G || \ + (devid) == PCI_CHIP_I945_GM || \ + (devid) == PCI_CHIP_I945_GME || \ + IS_G33(devid)) + +#define IS_G33(devid) ((devid) == PCI_CHIP_G33_G || \ + (devid) == PCI_CHIP_Q33_G || \ + (devid) == PCI_CHIP_Q35_G || IS_IGD(devid)) + +#define IS_GEN2(devid) ((devid) == PCI_CHIP_I830_M || \ + (devid) == PCI_CHIP_845_G || \ + (devid) == PCI_CHIP_I855_GM || \ + (devid) == PCI_CHIP_I865_G) + +#define IS_GEN3(devid) (IS_945(devid) || IS_915(devid)) + +#define IS_GEN4(devid) ((devid) == PCI_CHIP_I965_G || \ + (devid) == PCI_CHIP_I965_Q || \ + (devid) == PCI_CHIP_I965_G_1 || \ + (devid) == PCI_CHIP_I965_GM || \ + (devid) == PCI_CHIP_I965_GME || \ + (devid) == PCI_CHIP_I946_GZ || \ + IS_G4X(devid)) + +#define IS_GEN5(devid) (IS_ILD(devid) || IS_ILM(devid)) + +#define IS_GEN6(devid) ((devid) == PCI_CHIP_SANDYBRIDGE_GT1 || \ + (devid) == PCI_CHIP_SANDYBRIDGE_GT2 || \ + (devid) == PCI_CHIP_SANDYBRIDGE_GT2_PLUS || \ + (devid) == PCI_CHIP_SANDYBRIDGE_M_GT1 || \ + (devid) == PCI_CHIP_SANDYBRIDGE_M_GT2 || \ + (devid) == PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS || \ + (devid) == PCI_CHIP_SANDYBRIDGE_S) + +#define IS_GEN7(devid) (IS_IVYBRIDGE(devid) || \ + IS_HASWELL(devid) || \ + IS_VALLEYVIEW(devid)) + +#define IS_IVYBRIDGE(devid) ((devid) == PCI_CHIP_IVYBRIDGE_GT1 || \ + (devid) == PCI_CHIP_IVYBRIDGE_GT2 || \ + (devid) == PCI_CHIP_IVYBRIDGE_M_GT1 || \ + (devid) == PCI_CHIP_IVYBRIDGE_M_GT2 || \ + (devid) == PCI_CHIP_IVYBRIDGE_S || \ + (devid) == PCI_CHIP_IVYBRIDGE_S_GT2) + +#define IS_VALLEYVIEW(devid) ((devid) == PCI_CHIP_VALLEYVIEW_PO || \ + (devid) == PCI_CHIP_VALLEYVIEW_1 || \ + (devid) == PCI_CHIP_VALLEYVIEW_2 || \ + (devid) == PCI_CHIP_VALLEYVIEW_3) + +#define IS_HSW_GT1(devid) ((devid) == PCI_CHIP_HASWELL_GT1 || \ + (devid) == PCI_CHIP_HASWELL_M_GT1 || \ + (devid) == PCI_CHIP_HASWELL_S_GT1 || \ + (devid) == PCI_CHIP_HASWELL_B_GT1 || \ + (devid) == PCI_CHIP_HASWELL_E_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_M_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_S_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_B_GT1 || \ + (devid) == PCI_CHIP_HASWELL_SDV_E_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_M_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_S_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_B_GT1 || \ + (devid) == PCI_CHIP_HASWELL_ULT_E_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_M_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_S_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_B_GT1 || \ + (devid) == PCI_CHIP_HASWELL_CRW_E_GT1) +#define IS_HSW_GT2(devid) ((devid) == PCI_CHIP_HASWELL_GT2 || \ + (devid) == PCI_CHIP_HASWELL_M_GT2 || \ + (devid) == PCI_CHIP_HASWELL_S_GT2 || \ + (devid) == PCI_CHIP_HASWELL_B_GT2 || \ + (devid) == PCI_CHIP_HASWELL_E_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_M_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_S_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_B_GT2 || \ + (devid) == PCI_CHIP_HASWELL_SDV_E_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_M_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_S_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_B_GT2 || \ + (devid) == PCI_CHIP_HASWELL_ULT_E_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_M_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_S_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_B_GT2 || \ + (devid) == PCI_CHIP_HASWELL_CRW_E_GT2) +#define IS_HSW_GT3(devid) ((devid) == PCI_CHIP_HASWELL_GT3 || \ + (devid) == PCI_CHIP_HASWELL_M_GT3 || \ + (devid) == PCI_CHIP_HASWELL_S_GT3 || \ + (devid) == PCI_CHIP_HASWELL_B_GT3 || \ + (devid) == PCI_CHIP_HASWELL_E_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_M_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_S_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_B_GT3 || \ + (devid) == PCI_CHIP_HASWELL_SDV_E_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_M_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_S_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_B_GT3 || \ + (devid) == PCI_CHIP_HASWELL_ULT_E_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_M_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_S_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_B_GT3 || \ + (devid) == PCI_CHIP_HASWELL_CRW_E_GT3) + +#define IS_HASWELL(devid) (IS_HSW_GT1(devid) || \ + IS_HSW_GT2(devid) || \ + IS_HSW_GT3(devid)) + +#define IS_BROADWELL(devid) (((devid & 0xff00) != 0x1600) ? 0 : \ + (((devid & 0x00f0) >> 4) > 3) ? 0 : \ + ((devid & 0x000f) == BDW_SPARE) ? 1 : \ + ((devid & 0x000f) == BDW_ULT) ? 1 : \ + ((devid & 0x000f) == BDW_IRIS) ? 1 : \ + ((devid & 0x000f) == BDW_SERVER) ? 1 : \ + ((devid & 0x000f) == BDW_WORKSTATION) ? 1 : \ + ((devid & 0x000f) == BDW_ULX) ? 1 : 0) + +#define IS_CHERRYVIEW(devid) ((devid) == PCI_CHIP_CHERRYVIEW_0 || \ + (devid) == PCI_CHIP_CHERRYVIEW_1 || \ + (devid) == PCI_CHIP_CHERRYVIEW_2 || \ + (devid) == PCI_CHIP_CHERRYVIEW_3) + +#define IS_GEN8(devid) (IS_BROADWELL(devid) || \ + IS_CHERRYVIEW(devid)) + +/* New platforms use kernel pci ids */ +#include <stdbool.h> +#include <libdrm_macros.h> + +#define IS_9XX(dev) (!IS_GEN2(dev)) + +#endif /* _INTEL_CHIPSET_H */ diff --git a/intel/intel_debug.h b/intel/intel_debug.h new file mode 100644 index 0000000..fa0737c --- /dev/null +++ b/intel/intel_debug.h @@ -0,0 +1,44 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Ben Widawsky <ben@bwidawsk.net> + * + */ + +#ifndef INTEL_DEBUG_H +#define INTEL_DEBUG_H + +#include <stdint.h> + +#define SHADER_DEBUG_SOCKET "/var/run/gen_debug" +#define DEBUG_HANDSHAKE_VERSION 0x3 +#define DEBUG_HANDSHAKE_ACK "okay" + +/* First byte must always be the 1 byte version */ +struct intel_debug_handshake { + uint32_t version; + int flink_handle; + uint32_t per_thread_scratch; +} __attribute__((packed)); + +#endif diff --git a/intel/intel_decode.c b/intel/intel_decode.c new file mode 100644 index 0000000..b0fc228 --- /dev/null +++ b/intel/intel_decode.c @@ -0,0 +1,3984 @@ +/* + * Copyright © 2009-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <assert.h> +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <stdarg.h> +#include <string.h> + +#include "libdrm_macros.h" +#include "xf86drm.h" +#include "intel_chipset.h" +#include "intel_bufmgr.h" + + +/* Struct for tracking drm_intel_decode state. */ +struct drm_intel_decode { + /** stdio file where the output should land. Defaults to stdout. */ + FILE *out; + + /** PCI device ID. */ + uint32_t devid; + + /** + * Shorthand device identifier: 3 is 915, 4 is 965, 5 is + * Ironlake, etc. + */ + int gen; + + /** GPU address of the start of the current packet. */ + uint32_t hw_offset; + /** CPU virtual address of the start of the current packet. */ + uint32_t *data; + /** DWORDs of remaining batchbuffer data starting from the packet. */ + uint32_t count; + + /** GPU address of the start of the batchbuffer data. */ + uint32_t base_hw_offset; + /** CPU Virtual address of the start of the batchbuffer data. */ + uint32_t *base_data; + /** Number of DWORDs of batchbuffer data. */ + uint32_t base_count; + + /** @{ + * GPU head and tail pointers, which will be noted in the dump, or ~0. + */ + uint32_t head, tail; + /** @} */ + + /** + * Whether to dump the dwords after MI_BATCHBUFFER_END. + * + * This sometimes provides clues in corrupted batchbuffers, + * and is used by the intel-gpu-tools. + */ + bool dump_past_end; + + bool overflowed; +}; + +static FILE *out; +static uint32_t saved_s2 = 0, saved_s4 = 0; +static char saved_s2_set = 0, saved_s4_set = 0; +static uint32_t head_offset = 0xffffffff; /* undefined */ +static uint32_t tail_offset = 0xffffffff; /* undefined */ + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(A) (sizeof(A)/sizeof(A[0])) +#endif + +#define BUFFER_FAIL(_count, _len, _name) do { \ + fprintf(out, "Buffer size too small in %s (%d < %d)\n", \ + (_name), (_count), (_len)); \ + return _count; \ +} while (0) + +static float int_as_float(uint32_t intval) +{ + union intfloat { + uint32_t i; + float f; + } uval; + + uval.i = intval; + return uval.f; +} + +static void DRM_PRINTFLIKE(3, 4) +instr_out(struct drm_intel_decode *ctx, unsigned int index, + const char *fmt, ...) +{ + va_list va; + const char *parseinfo; + uint32_t offset = ctx->hw_offset + index * 4; + + if (index > ctx->count) { + if (!ctx->overflowed) { + fprintf(out, "ERROR: Decode attempted to continue beyond end of batchbuffer\n"); + ctx->overflowed = true; + } + return; + } + + if (offset == head_offset) + parseinfo = "HEAD"; + else if (offset == tail_offset) + parseinfo = "TAIL"; + else + parseinfo = " "; + + fprintf(out, "0x%08x: %s 0x%08x: %s", offset, parseinfo, + ctx->data[index], index == 0 ? "" : " "); + va_start(va, fmt); + vfprintf(out, fmt, va); + va_end(va); +} + +static int +decode_MI_SET_CONTEXT(struct drm_intel_decode *ctx) +{ + uint32_t data = ctx->data[1]; + if (ctx->gen > 7) + return 1; + + instr_out(ctx, 0, "MI_SET_CONTEXT\n"); + instr_out(ctx, 1, "gtt offset = 0x%x%s%s\n", + data & ~0xfff, + data & (1<<1)? ", Force Restore": "", + data & (1<<0)? ", Restore Inhibit": ""); + + return 2; +} + +static int +decode_MI_WAIT_FOR_EVENT(struct drm_intel_decode *ctx) +{ + const char *cc_wait; + int cc_shift = 0; + uint32_t data = ctx->data[0]; + + if (ctx->gen <= 5) + cc_shift = 9; + else + cc_shift = 16; + + switch ((data >> cc_shift) & 0x1f) { + case 1: + cc_wait = ", cc wait 1"; + break; + case 2: + cc_wait = ", cc wait 2"; + break; + case 3: + cc_wait = ", cc wait 3"; + break; + case 4: + cc_wait = ", cc wait 4"; + break; + case 5: + cc_wait = ", cc wait 4"; + break; + default: + cc_wait = ""; + break; + } + + if (ctx->gen <= 5) { + instr_out(ctx, 0, "MI_WAIT_FOR_EVENT%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + data & (1<<18)? ", pipe B start vblank wait": "", + data & (1<<17)? ", pipe A start vblank wait": "", + data & (1<<16)? ", overlay flip pending wait": "", + data & (1<<14)? ", pipe B hblank wait": "", + data & (1<<13)? ", pipe A hblank wait": "", + cc_wait, + data & (1<<8)? ", plane C pending flip wait": "", + data & (1<<7)? ", pipe B vblank wait": "", + data & (1<<6)? ", plane B pending flip wait": "", + data & (1<<5)? ", pipe B scan line wait": "", + data & (1<<4)? ", fbc idle wait": "", + data & (1<<3)? ", pipe A vblank wait": "", + data & (1<<2)? ", plane A pending flip wait": "", + data & (1<<1)? ", plane A scan line wait": ""); + } else { + instr_out(ctx, 0, "MI_WAIT_FOR_EVENT%s%s%s%s%s%s%s%s%s%s%s%s\n", + data & (1<<20)? ", sprite C pending flip wait": "", /* ivb */ + cc_wait, + data & (1<<13)? ", pipe B hblank wait": "", + data & (1<<11)? ", pipe B vblank wait": "", + data & (1<<10)? ", sprite B pending flip wait": "", + data & (1<<9)? ", plane B pending flip wait": "", + data & (1<<8)? ", plane B scan line wait": "", + data & (1<<5)? ", pipe A hblank wait": "", + data & (1<<3)? ", pipe A vblank wait": "", + data & (1<<2)? ", sprite A pending flip wait": "", + data & (1<<1)? ", plane A pending flip wait": "", + data & (1<<0)? ", plane A scan line wait": ""); + } + + return 1; +} + +static int +decode_mi(struct drm_intel_decode *ctx) +{ + unsigned int opcode, len = -1; + const char *post_sync_op = ""; + uint32_t *data = ctx->data; + + struct { + uint32_t opcode; + int len_mask; + unsigned int min_len; + unsigned int max_len; + const char *name; + int (*func)(struct drm_intel_decode *ctx); + } opcodes_mi[] = { + { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" }, + { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" }, + { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" }, + { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" }, + { 0x04, 0, 1, 1, "MI_FLUSH" }, + { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" }, + { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" }, + { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" }, + { 0x00, 0, 1, 1, "MI_NOOP" }, + { 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" }, + { 0x07, 0, 1, 1, "MI_REPORT_HEAD" }, + { 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT", decode_MI_SET_CONTEXT }, + { 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" }, + { 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" }, + { 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" }, + { 0x02, 0, 1, 1, "MI_USER_INTERRUPT" }, + { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT", decode_MI_WAIT_FOR_EVENT }, + { 0x16, 0x7f, 3, 3, "MI_SEMAPHORE_MBOX" }, + { 0x26, 0x1f, 3, 4, "MI_FLUSH_DW" }, + { 0x28, 0x3f, 3, 3, "MI_REPORT_PERF_COUNT" }, + { 0x29, 0xff, 3, 3, "MI_LOAD_REGISTER_MEM" }, + { 0x0b, 0, 1, 1, "MI_SUSPEND_FLUSH"}, + }, *opcode_mi = NULL; + + /* check instruction length */ + for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]); + opcode++) { + if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { + len = 1; + if (opcodes_mi[opcode].max_len > 1) { + len = + (data[0] & opcodes_mi[opcode].len_mask) + 2; + if (len < opcodes_mi[opcode].min_len + || len > opcodes_mi[opcode].max_len) { + fprintf(out, + "Bad length (%d) in %s, [%d, %d]\n", + len, opcodes_mi[opcode].name, + opcodes_mi[opcode].min_len, + opcodes_mi[opcode].max_len); + } + } + opcode_mi = &opcodes_mi[opcode]; + break; + } + } + + if (opcode_mi && opcode_mi->func) + return opcode_mi->func(ctx); + + switch ((data[0] & 0x1f800000) >> 23) { + case 0x0a: + instr_out(ctx, 0, "MI_BATCH_BUFFER_END\n"); + return -1; + case 0x16: + instr_out(ctx, 0, "MI_SEMAPHORE_MBOX%s%s%s%s %u\n", + data[0] & (1 << 22) ? " global gtt," : "", + data[0] & (1 << 21) ? " update semaphore," : "", + data[0] & (1 << 20) ? " compare semaphore," : "", + data[0] & (1 << 18) ? " use compare reg" : "", + (data[0] & (0x3 << 16)) >> 16); + instr_out(ctx, 1, "value\n"); + instr_out(ctx, 2, "address\n"); + return len; + case 0x21: + instr_out(ctx, 0, "MI_STORE_DATA_INDEX%s\n", + data[0] & (1 << 21) ? " use per-process HWS," : ""); + instr_out(ctx, 1, "index\n"); + instr_out(ctx, 2, "dword\n"); + if (len == 4) + instr_out(ctx, 3, "upper dword\n"); + return len; + case 0x00: + if (data[0] & (1 << 22)) + instr_out(ctx, 0, + "MI_NOOP write NOPID reg, val=0x%x\n", + data[0] & ((1 << 22) - 1)); + else + instr_out(ctx, 0, "MI_NOOP\n"); + return len; + case 0x26: + switch (data[0] & (0x3 << 14)) { + case (0 << 14): + post_sync_op = "no write"; + break; + case (1 << 14): + post_sync_op = "write data"; + break; + case (2 << 14): + post_sync_op = "reserved"; + break; + case (3 << 14): + post_sync_op = "write TIMESTAMP"; + break; + } + instr_out(ctx, 0, + "MI_FLUSH_DW%s%s%s%s post_sync_op='%s' %s%s\n", + data[0] & (1 << 22) ? + " enable protected mem (BCS-only)," : "", + data[0] & (1 << 21) ? " store in hws," : "", + data[0] & (1 << 18) ? " invalidate tlb," : "", + data[0] & (1 << 17) ? " flush gfdt," : "", + post_sync_op, + data[0] & (1 << 8) ? " enable notify interrupt," : "", + data[0] & (1 << 7) ? + " invalidate video state (BCS-only)," : ""); + if (data[0] & (1 << 21)) + instr_out(ctx, 1, "hws index\n"); + else + instr_out(ctx, 1, "address\n"); + instr_out(ctx, 2, "dword\n"); + if (len == 4) + instr_out(ctx, 3, "upper dword\n"); + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_mi) / sizeof(opcodes_mi[0]); + opcode++) { + if ((data[0] & 0x1f800000) >> 23 == opcodes_mi[opcode].opcode) { + unsigned int i; + + instr_out(ctx, 0, "%s\n", + opcodes_mi[opcode].name); + for (i = 1; i < len; i++) { + instr_out(ctx, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(ctx, 0, "MI UNKNOWN\n"); + return 1; +} + +static void +decode_2d_br00(struct drm_intel_decode *ctx, const char *cmd) +{ + instr_out(ctx, 0, + "%s (rgb %sabled, alpha %sabled, src tile %d, dst tile %d)\n", + cmd, + (ctx->data[0] & (1 << 20)) ? "en" : "dis", + (ctx->data[0] & (1 << 21)) ? "en" : "dis", + (ctx->data[0] >> 15) & 1, + (ctx->data[0] >> 11) & 1); +} + +static void +decode_2d_br01(struct drm_intel_decode *ctx) +{ + const char *format; + switch ((ctx->data[1] >> 24) & 0x3) { + case 0: + format = "8"; + break; + case 1: + format = "565"; + break; + case 2: + format = "1555"; + break; + case 3: + format = "8888"; + break; + } + + instr_out(ctx, 1, + "format %s, pitch %d, rop 0x%02x, " + "clipping %sabled, %s%s \n", + format, + (short)(ctx->data[1] & 0xffff), + (ctx->data[1] >> 16) & 0xff, + ctx->data[1] & (1 << 30) ? "en" : "dis", + ctx->data[1] & (1 << 31) ? "solid pattern enabled, " : "", + ctx->data[1] & (1 << 31) ? + "mono pattern transparency enabled, " : ""); + +} + +static int +decode_2d(struct drm_intel_decode *ctx) +{ + unsigned int opcode, len; + uint32_t *data = ctx->data; + + struct { + uint32_t opcode; + unsigned int min_len; + unsigned int max_len; + const char *name; + } opcodes_2d[] = { + { 0x40, 5, 5, "COLOR_BLT" }, + { 0x43, 6, 6, "SRC_COPY_BLT" }, + { 0x01, 8, 8, "XY_SETUP_BLT" }, + { 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" }, + { 0x03, 3, 3, "XY_SETUP_CLIP_BLT" }, + { 0x24, 2, 2, "XY_PIXEL_BLT" }, + { 0x25, 3, 3, "XY_SCANLINES_BLT" }, + { 0x26, 4, 4, "Y_TEXT_BLT" }, + { 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" }, + { 0x50, 6, 6, "XY_COLOR_BLT" }, + { 0x51, 6, 6, "XY_PAT_BLT" }, + { 0x76, 8, 8, "XY_PAT_CHROMA_BLT" }, + { 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" }, + { 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" }, + { 0x52, 9, 9, "XY_MONO_PAT_BLT" }, + { 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" }, + { 0x53, 8, 8, "XY_SRC_COPY_BLT" }, + { 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" }, + { 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" }, + { 0x55, 9, 9, "XY_FULL_BLT" }, + { 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" }, + { 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" }, + { 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" }, + { 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" }, + { 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT"}, + }; + + switch ((data[0] & 0x1fc00000) >> 22) { + case 0x25: + instr_out(ctx, 0, + "XY_SCANLINES_BLT (pattern seed (%d, %d), dst tile %d)\n", + (data[0] >> 12) & 0x8, + (data[0] >> 8) & 0x8, (data[0] >> 11) & 1); + + len = (data[0] & 0x000000ff) + 2; + if (len != 3) + fprintf(out, "Bad count in XY_SCANLINES_BLT\n"); + + instr_out(ctx, 1, "dest (%d,%d)\n", + data[1] & 0xffff, data[1] >> 16); + instr_out(ctx, 2, "dest (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + return len; + case 0x01: + decode_2d_br00(ctx, "XY_SETUP_BLT"); + + len = (data[0] & 0x000000ff) + 2; + if (len != 8) + fprintf(out, "Bad count in XY_SETUP_BLT\n"); + + decode_2d_br01(ctx); + instr_out(ctx, 2, "cliprect (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(ctx, 3, "cliprect (%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(ctx, 4, "setup dst offset 0x%08x\n", + data[4]); + instr_out(ctx, 5, "setup background color\n"); + instr_out(ctx, 6, "setup foreground color\n"); + instr_out(ctx, 7, "color pattern offset\n"); + return len; + case 0x03: + decode_2d_br00(ctx, "XY_SETUP_CLIP_BLT"); + + len = (data[0] & 0x000000ff) + 2; + if (len != 3) + fprintf(out, "Bad count in XY_SETUP_CLIP_BLT\n"); + + instr_out(ctx, 1, "cliprect (%d,%d)\n", + data[1] & 0xffff, data[2] >> 16); + instr_out(ctx, 2, "cliprect (%d,%d)\n", + data[2] & 0xffff, data[3] >> 16); + return len; + case 0x11: + decode_2d_br00(ctx, "XY_SETUP_MONO_PATTERN_SL_BLT"); + + len = (data[0] & 0x000000ff) + 2; + if (len != 9) + fprintf(out, + "Bad count in XY_SETUP_MONO_PATTERN_SL_BLT\n"); + + decode_2d_br01(ctx); + instr_out(ctx, 2, "cliprect (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(ctx, 3, "cliprect (%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(ctx, 4, "setup dst offset 0x%08x\n", + data[4]); + instr_out(ctx, 5, "setup background color\n"); + instr_out(ctx, 6, "setup foreground color\n"); + instr_out(ctx, 7, "mono pattern dw0\n"); + instr_out(ctx, 8, "mono pattern dw1\n"); + return len; + case 0x50: + decode_2d_br00(ctx, "XY_COLOR_BLT"); + + len = (data[0] & 0x000000ff) + 2; + if (len != 6) + fprintf(out, "Bad count in XY_COLOR_BLT\n"); + + decode_2d_br01(ctx); + instr_out(ctx, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(ctx, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(ctx, 4, "offset 0x%08x\n", data[4]); + instr_out(ctx, 5, "color\n"); + return len; + case 0x53: + decode_2d_br00(ctx, "XY_SRC_COPY_BLT"); + + len = (data[0] & 0x000000ff) + 2; + if (len != 8) + fprintf(out, "Bad count in XY_SRC_COPY_BLT\n"); + + decode_2d_br01(ctx); + instr_out(ctx, 2, "dst (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(ctx, 3, "dst (%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(ctx, 4, "dst offset 0x%08x\n", data[4]); + instr_out(ctx, 5, "src (%d,%d)\n", + data[5] & 0xffff, data[5] >> 16); + instr_out(ctx, 6, "src pitch %d\n", + (short)(data[6] & 0xffff)); + instr_out(ctx, 7, "src offset 0x%08x\n", data[7]); + return len; + } + + for (opcode = 0; opcode < sizeof(opcodes_2d) / sizeof(opcodes_2d[0]); + opcode++) { + if ((data[0] & 0x1fc00000) >> 22 == opcodes_2d[opcode].opcode) { + unsigned int i; + + len = 1; + instr_out(ctx, 0, "%s\n", + opcodes_2d[opcode].name); + if (opcodes_2d[opcode].max_len > 1) { + len = (data[0] & 0x000000ff) + 2; + if (len < opcodes_2d[opcode].min_len || + len > opcodes_2d[opcode].max_len) { + fprintf(out, "Bad count in %s\n", + opcodes_2d[opcode].name); + } + } + + for (i = 1; i < len; i++) { + instr_out(ctx, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(ctx, 0, "2D UNKNOWN\n"); + return 1; +} + +static int +decode_3d_1c(struct drm_intel_decode *ctx) +{ + uint32_t *data = ctx->data; + uint32_t opcode; + + opcode = (data[0] & 0x00f80000) >> 19; + + switch (opcode) { + case 0x11: + instr_out(ctx, 0, + "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n"); + return 1; + case 0x10: + instr_out(ctx, 0, "3DSTATE_SCISSOR_ENABLE %s\n", + data[0] & 1 ? "enabled" : "disabled"); + return 1; + case 0x01: + instr_out(ctx, 0, "3DSTATE_MAP_COORD_SET_I830\n"); + return 1; + case 0x0a: + instr_out(ctx, 0, "3DSTATE_MAP_CUBE_I830\n"); + return 1; + case 0x05: + instr_out(ctx, 0, "3DSTATE_MAP_TEX_STREAM_I830\n"); + return 1; + } + + instr_out(ctx, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n", + opcode); + return 1; +} + +/** Sets the string dstname to describe the destination of the PS instruction */ +static void +i915_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask) +{ + uint32_t a0 = data[i]; + int dst_nr = (a0 >> 14) & 0xf; + char dstmask[8]; + const char *sat; + + if (do_mask) { + if (((a0 >> 10) & 0xf) == 0xf) { + dstmask[0] = 0; + } else { + int dstmask_index = 0; + + dstmask[dstmask_index++] = '.'; + if (a0 & (1 << 10)) + dstmask[dstmask_index++] = 'x'; + if (a0 & (1 << 11)) + dstmask[dstmask_index++] = 'y'; + if (a0 & (1 << 12)) + dstmask[dstmask_index++] = 'z'; + if (a0 & (1 << 13)) + dstmask[dstmask_index++] = 'w'; + dstmask[dstmask_index++] = 0; + } + + if (a0 & (1 << 22)) + sat = ".sat"; + else + sat = ""; + } else { + dstmask[0] = 0; + sat = ""; + } + + switch ((a0 >> 19) & 0x7) { + case 0: + if (dst_nr > 15) + fprintf(out, "bad destination reg R%d\n", dst_nr); + sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat); + break; + case 4: + if (dst_nr > 0) + fprintf(out, "bad destination reg oC%d\n", dst_nr); + sprintf(dstname, "oC%s%s", dstmask, sat); + break; + case 5: + if (dst_nr > 0) + fprintf(out, "bad destination reg oD%d\n", dst_nr); + sprintf(dstname, "oD%s%s", dstmask, sat); + break; + case 6: + if (dst_nr > 3) + fprintf(out, "bad destination reg U%d\n", dst_nr); + sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); + break; + default: + sprintf(dstname, "RESERVED"); + break; + } +} + +static const char * +i915_get_channel_swizzle(uint32_t select) +{ + switch (select & 0x7) { + case 0: + return (select & 8) ? "-x" : "x"; + case 1: + return (select & 8) ? "-y" : "y"; + case 2: + return (select & 8) ? "-z" : "z"; + case 3: + return (select & 8) ? "-w" : "w"; + case 4: + return (select & 8) ? "-0" : "0"; + case 5: + return (select & 8) ? "-1" : "1"; + default: + return (select & 8) ? "-bad" : "bad"; + } +} + +static void +i915_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + if (src_nr > 15) + fprintf(out, "bad src reg %s\n", name); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + fprintf(out, "bad src reg T%d\n", src_nr); + sprintf(name, "RESERVED"); + } + break; + case 2: + sprintf(name, "C%d", src_nr); + if (src_nr > 31) + fprintf(out, "bad src reg %s\n", name); + break; + case 4: + sprintf(name, "oC"); + if (src_nr > 0) + fprintf(out, "bad src reg oC%d\n", src_nr); + break; + case 5: + sprintf(name, "oD"); + if (src_nr > 0) + fprintf(out, "bad src reg oD%d\n", src_nr); + break; + case 6: + sprintf(name, "U%d", src_nr); + if (src_nr > 3) + fprintf(out, "bad src reg %s\n", name); + break; + default: + fprintf(out, "bad src reg type %d\n", src_type); + sprintf(name, "RESERVED"); + break; + } +} + +static void i915_get_instruction_src0(uint32_t *data, int i, char *srcname) +{ + uint32_t a0 = data[i]; + uint32_t a1 = data[i + 1]; + int src_nr = (a0 >> 2) & 0x1f; + const char *swizzle_x = i915_get_channel_swizzle((a1 >> 28) & 0xf); + const char *swizzle_y = i915_get_channel_swizzle((a1 >> 24) & 0xf); + const char *swizzle_z = i915_get_channel_swizzle((a1 >> 20) & 0xf); + const char *swizzle_w = i915_get_channel_swizzle((a1 >> 16) & 0xf); + char swizzle[100]; + + i915_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, + swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void i915_get_instruction_src1(uint32_t *data, int i, char *srcname) +{ + uint32_t a1 = data[i + 1]; + uint32_t a2 = data[i + 2]; + int src_nr = (a1 >> 8) & 0x1f; + const char *swizzle_x = i915_get_channel_swizzle((a1 >> 4) & 0xf); + const char *swizzle_y = i915_get_channel_swizzle((a1 >> 0) & 0xf); + const char *swizzle_z = i915_get_channel_swizzle((a2 >> 28) & 0xf); + const char *swizzle_w = i915_get_channel_swizzle((a2 >> 24) & 0xf); + char swizzle[100]; + + i915_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, + swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void i915_get_instruction_src2(uint32_t *data, int i, char *srcname) +{ + uint32_t a2 = data[i + 2]; + int src_nr = (a2 >> 16) & 0x1f; + const char *swizzle_x = i915_get_channel_swizzle((a2 >> 12) & 0xf); + const char *swizzle_y = i915_get_channel_swizzle((a2 >> 8) & 0xf); + const char *swizzle_z = i915_get_channel_swizzle((a2 >> 4) & 0xf); + const char *swizzle_w = i915_get_channel_swizzle((a2 >> 0) & 0xf); + char swizzle[100]; + + i915_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, + swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +i915_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + if (src_nr > 15) + fprintf(out, "bad src reg %s\n", name); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + fprintf(out, "bad src reg T%d\n", src_nr); + sprintf(name, "RESERVED"); + } + break; + case 4: + sprintf(name, "oC"); + if (src_nr > 0) + fprintf(out, "bad src reg oC%d\n", src_nr); + break; + case 5: + sprintf(name, "oD"); + if (src_nr > 0) + fprintf(out, "bad src reg oD%d\n", src_nr); + break; + default: + fprintf(out, "bad src reg type %d\n", src_type); + sprintf(name, "RESERVED"); + break; + } +} + +static void +i915_decode_alu1(struct drm_intel_decode *ctx, + int i, char *instr_prefix, const char *op_name) +{ + char dst[100], src0[100]; + + i915_get_instruction_dst(ctx->data, i, dst, 1); + i915_get_instruction_src0(ctx->data, i, src0); + + instr_out(ctx, i++, "%s: %s %s, %s\n", instr_prefix, + op_name, dst, src0); + instr_out(ctx, i++, "%s\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_alu2(struct drm_intel_decode *ctx, + int i, char *instr_prefix, const char *op_name) +{ + char dst[100], src0[100], src1[100]; + + i915_get_instruction_dst(ctx->data, i, dst, 1); + i915_get_instruction_src0(ctx->data, i, src0); + i915_get_instruction_src1(ctx->data, i, src1); + + instr_out(ctx, i++, "%s: %s %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1); + instr_out(ctx, i++, "%s\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_alu3(struct drm_intel_decode *ctx, + int i, char *instr_prefix, const char *op_name) +{ + char dst[100], src0[100], src1[100], src2[100]; + + i915_get_instruction_dst(ctx->data, i, dst, 1); + i915_get_instruction_src0(ctx->data, i, src0); + i915_get_instruction_src1(ctx->data, i, src1); + i915_get_instruction_src2(ctx->data, i, src2); + + instr_out(ctx, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1, src2); + instr_out(ctx, i++, "%s\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_tex(struct drm_intel_decode *ctx, int i, + const char *instr_prefix, const char *tex_name) +{ + uint32_t t0 = ctx->data[i]; + uint32_t t1 = ctx->data[i + 1]; + char dst_name[100]; + char addr_name[100]; + int sampler_nr; + + i915_get_instruction_dst(ctx->data, i, dst_name, 0); + i915_get_instruction_addr((t1 >> 24) & 0x7, + (t1 >> 17) & 0xf, addr_name); + sampler_nr = t0 & 0xf; + + instr_out(ctx, i++, "%s: %s %s, S%d, %s\n", instr_prefix, + tex_name, dst_name, sampler_nr, addr_name); + instr_out(ctx, i++, "%s\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); +} + +static void +i915_decode_dcl(struct drm_intel_decode *ctx, int i, char *instr_prefix) +{ + uint32_t d0 = ctx->data[i]; + const char *sampletype; + int dcl_nr = (d0 >> 14) & 0xf; + const char *dcl_x = d0 & (1 << 10) ? "x" : ""; + const char *dcl_y = d0 & (1 << 11) ? "y" : ""; + const char *dcl_z = d0 & (1 << 12) ? "z" : ""; + const char *dcl_w = d0 & (1 << 13) ? "w" : ""; + char dcl_mask[10]; + + switch ((d0 >> 19) & 0x3) { + case 1: + sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); + if (strcmp(dcl_mask, ".") == 0) + fprintf(out, "bad (empty) dcl mask\n"); + + if (dcl_nr > 10) + fprintf(out, "bad T%d dcl register number\n", dcl_nr); + if (dcl_nr < 8) { + if (strcmp(dcl_mask, ".x") != 0 && + strcmp(dcl_mask, ".xy") != 0 && + strcmp(dcl_mask, ".xz") != 0 && + strcmp(dcl_mask, ".w") != 0 && + strcmp(dcl_mask, ".xyzw") != 0) { + fprintf(out, "bad T%d.%s dcl mask\n", dcl_nr, + dcl_mask); + } + instr_out(ctx, i++, "%s: DCL T%d%s\n", + instr_prefix, dcl_nr, dcl_mask); + } else { + if (strcmp(dcl_mask, ".xz") == 0) + fprintf(out, "errataed bad dcl mask %s\n", + dcl_mask); + else if (strcmp(dcl_mask, ".xw") == 0) + fprintf(out, "errataed bad dcl mask %s\n", + dcl_mask); + else if (strcmp(dcl_mask, ".xzw") == 0) + fprintf(out, "errataed bad dcl mask %s\n", + dcl_mask); + + if (dcl_nr == 8) { + instr_out(ctx, i++, + "%s: DCL DIFFUSE%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 9) { + instr_out(ctx, i++, + "%s: DCL SPECULAR%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 10) { + instr_out(ctx, i++, + "%s: DCL FOG%s\n", instr_prefix, + dcl_mask); + } + } + instr_out(ctx, i++, "%s\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); + break; + case 3: + switch ((d0 >> 22) & 0x3) { + case 0: + sampletype = "2D"; + break; + case 1: + sampletype = "CUBE"; + break; + case 2: + sampletype = "3D"; + break; + default: + sampletype = "RESERVED"; + break; + } + if (dcl_nr > 15) + fprintf(out, "bad S%d dcl register number\n", dcl_nr); + instr_out(ctx, i++, "%s: DCL S%d %s\n", + instr_prefix, dcl_nr, sampletype); + instr_out(ctx, i++, "%s\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); + break; + default: + instr_out(ctx, i++, "%s: DCL RESERVED%d\n", + instr_prefix, dcl_nr); + instr_out(ctx, i++, "%s\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); + } +} + +static void +i915_decode_instruction(struct drm_intel_decode *ctx, + int i, char *instr_prefix) +{ + switch ((ctx->data[i] >> 24) & 0x1f) { + case 0x0: + instr_out(ctx, i++, "%s: NOP\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); + break; + case 0x01: + i915_decode_alu2(ctx, i, instr_prefix, "ADD"); + break; + case 0x02: + i915_decode_alu1(ctx, i, instr_prefix, "MOV"); + break; + case 0x03: + i915_decode_alu2(ctx, i, instr_prefix, "MUL"); + break; + case 0x04: + i915_decode_alu3(ctx, i, instr_prefix, "MAD"); + break; + case 0x05: + i915_decode_alu3(ctx, i, instr_prefix, "DP2ADD"); + break; + case 0x06: + i915_decode_alu2(ctx, i, instr_prefix, "DP3"); + break; + case 0x07: + i915_decode_alu2(ctx, i, instr_prefix, "DP4"); + break; + case 0x08: + i915_decode_alu1(ctx, i, instr_prefix, "FRC"); + break; + case 0x09: + i915_decode_alu1(ctx, i, instr_prefix, "RCP"); + break; + case 0x0a: + i915_decode_alu1(ctx, i, instr_prefix, "RSQ"); + break; + case 0x0b: + i915_decode_alu1(ctx, i, instr_prefix, "EXP"); + break; + case 0x0c: + i915_decode_alu1(ctx, i, instr_prefix, "LOG"); + break; + case 0x0d: + i915_decode_alu2(ctx, i, instr_prefix, "CMP"); + break; + case 0x0e: + i915_decode_alu2(ctx, i, instr_prefix, "MIN"); + break; + case 0x0f: + i915_decode_alu2(ctx, i, instr_prefix, "MAX"); + break; + case 0x10: + i915_decode_alu1(ctx, i, instr_prefix, "FLR"); + break; + case 0x11: + i915_decode_alu1(ctx, i, instr_prefix, "MOD"); + break; + case 0x12: + i915_decode_alu1(ctx, i, instr_prefix, "TRC"); + break; + case 0x13: + i915_decode_alu2(ctx, i, instr_prefix, "SGE"); + break; + case 0x14: + i915_decode_alu2(ctx, i, instr_prefix, "SLT"); + break; + case 0x15: + i915_decode_tex(ctx, i, instr_prefix, "TEXLD"); + break; + case 0x16: + i915_decode_tex(ctx, i, instr_prefix, "TEXLDP"); + break; + case 0x17: + i915_decode_tex(ctx, i, instr_prefix, "TEXLDB"); + break; + case 0x19: + i915_decode_dcl(ctx, i, instr_prefix); + break; + default: + instr_out(ctx, i++, "%s: unknown\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); + instr_out(ctx, i++, "%s\n", instr_prefix); + break; + } +} + +static const char * +decode_compare_func(uint32_t op) +{ + switch (op & 0x7) { + case 0: + return "always"; + case 1: + return "never"; + case 2: + return "less"; + case 3: + return "equal"; + case 4: + return "lequal"; + case 5: + return "greater"; + case 6: + return "notequal"; + case 7: + return "gequal"; + } + return ""; +} + +static const char * +decode_stencil_op(uint32_t op) +{ + switch (op & 0x7) { + case 0: + return "keep"; + case 1: + return "zero"; + case 2: + return "replace"; + case 3: + return "incr_sat"; + case 4: + return "decr_sat"; + case 5: + return "greater"; + case 6: + return "incr"; + case 7: + return "decr"; + } + return ""; +} + +#if 0 +static const char * +decode_logic_op(uint32_t op) +{ + switch (op & 0xf) { + case 0: + return "clear"; + case 1: + return "nor"; + case 2: + return "and_inv"; + case 3: + return "copy_inv"; + case 4: + return "and_rvrse"; + case 5: + return "inv"; + case 6: + return "xor"; + case 7: + return "nand"; + case 8: + return "and"; + case 9: + return "equiv"; + case 10: + return "noop"; + case 11: + return "or_inv"; + case 12: + return "copy"; + case 13: + return "or_rvrse"; + case 14: + return "or"; + case 15: + return "set"; + } + return ""; +} +#endif + +static const char * +decode_blend_fact(uint32_t op) +{ + switch (op & 0xf) { + case 1: + return "zero"; + case 2: + return "one"; + case 3: + return "src_colr"; + case 4: + return "inv_src_colr"; + case 5: + return "src_alpha"; + case 6: + return "inv_src_alpha"; + case 7: + return "dst_alpha"; + case 8: + return "inv_dst_alpha"; + case 9: + return "dst_colr"; + case 10: + return "inv_dst_colr"; + case 11: + return "src_alpha_sat"; + case 12: + return "cnst_colr"; + case 13: + return "inv_cnst_colr"; + case 14: + return "cnst_alpha"; + case 15: + return "inv_const_alpha"; + } + return ""; +} + +static const char * +decode_tex_coord_mode(uint32_t mode) +{ + switch (mode & 0x7) { + case 0: + return "wrap"; + case 1: + return "mirror"; + case 2: + return "clamp_edge"; + case 3: + return "cube"; + case 4: + return "clamp_border"; + case 5: + return "mirror_once"; + } + return ""; +} + +static const char * +decode_sample_filter(uint32_t mode) +{ + switch (mode & 0x7) { + case 0: + return "nearest"; + case 1: + return "linear"; + case 2: + return "anisotropic"; + case 3: + return "4x4_1"; + case 4: + return "4x4_2"; + case 5: + return "4x4_flat"; + case 6: + return "6x5_mono"; + } + return ""; +} + +static int +decode_3d_1d(struct drm_intel_decode *ctx) +{ + unsigned int len, i, c, idx, word, map, sampler, instr; + const char *format, *zformat, *type; + uint32_t opcode; + uint32_t *data = ctx->data; + uint32_t devid = ctx->devid; + + struct { + uint32_t opcode; + int i830_only; + unsigned int min_len; + unsigned int max_len; + const char *name; + } opcodes_3d_1d[] = { + { 0x86, 0, 4, 4, "3DSTATE_CHROMA_KEY" }, + { 0x88, 0, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, + { 0x99, 0, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, + { 0x9a, 0, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, + { 0x98, 0, 2, 2, "3DSTATE_DEFAULT_Z" }, + { 0x97, 0, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, + { 0x9d, 0, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, + { 0x9e, 0, 4, 4, "3DSTATE_MONO_FILTER" }, + { 0x89, 0, 4, 4, "3DSTATE_FOG_MODE" }, + { 0x8f, 0, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" }, + { 0x83, 0, 2, 2, "3DSTATE_SPAN_STIPPLE" }, + { 0x8c, 1, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM_I830" }, + { 0x8b, 1, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM_I830" }, + { 0x8d, 1, 3, 3, "3DSTATE_W_STATE_I830" }, + { 0x01, 1, 2, 2, "3DSTATE_COLOR_FACTOR_I830" }, + { 0x02, 1, 2, 2, "3DSTATE_MAP_COORD_SETBIND_I830"}, + }, *opcode_3d_1d; + + opcode = (data[0] & 0x00ff0000) >> 16; + + switch (opcode) { + case 0x07: + /* This instruction is unusual. A 0 length means just + * 1 DWORD instead of 2. The 0 length is specified in + * one place to be unsupported, but stated to be + * required in another, and 0 length LOAD_INDIRECTs + * appear to cause no harm at least. + */ + instr_out(ctx, 0, "3DSTATE_LOAD_INDIRECT\n"); + len = (data[0] & 0x000000ff) + 1; + i = 1; + if (data[0] & (0x01 << 8)) { + instr_out(ctx, i++, "SIS.0\n"); + instr_out(ctx, i++, "SIS.1\n"); + } + if (data[0] & (0x02 << 8)) { + instr_out(ctx, i++, "DIS.0\n"); + } + if (data[0] & (0x04 << 8)) { + instr_out(ctx, i++, "SSB.0\n"); + instr_out(ctx, i++, "SSB.1\n"); + } + if (data[0] & (0x08 << 8)) { + instr_out(ctx, i++, "MSB.0\n"); + instr_out(ctx, i++, "MSB.1\n"); + } + if (data[0] & (0x10 << 8)) { + instr_out(ctx, i++, "PSP.0\n"); + instr_out(ctx, i++, "PSP.1\n"); + } + if (data[0] & (0x20 << 8)) { + instr_out(ctx, i++, "PSC.0\n"); + instr_out(ctx, i++, "PSC.1\n"); + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_LOAD_INDIRECT\n"); + return len; + } + return len; + case 0x04: + instr_out(ctx, 0, + "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 0; word <= 8; word++) { + if (data[0] & (1 << (4 + word))) { + /* save vertex state for decode */ + if (!IS_GEN2(devid)) { + int tex_num; + + if (word == 2) { + saved_s2_set = 1; + saved_s2 = data[i]; + } + if (word == 4) { + saved_s4_set = 1; + saved_s4 = data[i]; + } + + switch (word) { + case 0: + instr_out(ctx, i, + "S0: vbo offset: 0x%08x%s\n", + data[i] & (~1), + data[i] & 1 ? + ", auto cache invalidate disabled" + : ""); + break; + case 1: + instr_out(ctx, i, + "S1: vertex width: %i, vertex pitch: %i\n", + (data[i] >> 24) & + 0x3f, + (data[i] >> 16) & + 0x3f); + break; + case 2: + instr_out(ctx, i, + "S2: texcoord formats: "); + for (tex_num = 0; + tex_num < 8; tex_num++) { + switch ((data[i] >> + tex_num * + 4) & 0xf) { + case 0: + fprintf(out, + "%i=2D ", + tex_num); + break; + case 1: + fprintf(out, + "%i=3D ", + tex_num); + break; + case 2: + fprintf(out, + "%i=4D ", + tex_num); + break; + case 3: + fprintf(out, + "%i=1D ", + tex_num); + break; + case 4: + fprintf(out, + "%i=2D_16 ", + tex_num); + break; + case 5: + fprintf(out, + "%i=4D_16 ", + tex_num); + break; + case 0xf: + fprintf(out, + "%i=NP ", + tex_num); + break; + } + } + fprintf(out, "\n"); + + break; + case 3: + instr_out(ctx, i, + "S3: not documented\n"); + break; + case 4: + { + const char *cullmode = ""; + const char *vfmt_xyzw = ""; + switch ((data[i] >> 13) + & 0x3) { + case 0: + cullmode = + "both"; + break; + case 1: + cullmode = + "none"; + break; + case 2: + cullmode = "cw"; + break; + case 3: + cullmode = + "ccw"; + break; + } + switch (data[i] & + (7 << 6 | 1 << + 2)) { + case 1 << 6: + vfmt_xyzw = + "XYZ,"; + break; + case 2 << 6: + vfmt_xyzw = + "XYZW,"; + break; + case 3 << 6: + vfmt_xyzw = + "XY,"; + break; + case 4 << 6: + vfmt_xyzw = + "XYW,"; + break; + case 1 << 6 | 1 << 2: + vfmt_xyzw = + "XYZF,"; + break; + case 2 << 6 | 1 << 2: + vfmt_xyzw = + "XYZWF,"; + break; + case 3 << 6 | 1 << 2: + vfmt_xyzw = + "XYF,"; + break; + case 4 << 6 | 1 << 2: + vfmt_xyzw = + "XYWF,"; + break; + } + instr_out(ctx, i, + "S4: point_width=%i, line_width=%.1f," + "%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s " + "%s%s%s%s%s\n", + (data[i] >> + 23) & 0x1ff, + ((data[i] >> + 19) & 0xf) / + 2.0, + data[i] & (0xf + << + 15) + ? + " flatshade=" + : "", + data[i] & (1 + << + 18) + ? "Alpha," : + "", + data[i] & (1 + << + 17) + ? "Fog," : "", + data[i] & (1 + << + 16) + ? "Specular," + : "", + data[i] & (1 + << + 15) + ? "Color," : + "", cullmode, + data[i] & (1 + << + 12) + ? + "PointWidth," + : "", + data[i] & (1 + << + 11) + ? "SpecFog," : + "", + data[i] & (1 + << + 10) + ? "Color," : + "", + data[i] & (1 + << + 9) + ? "DepthOfs," + : "", + vfmt_xyzw, + data[i] & (1 + << + 9) + ? "FogParam," + : "", + data[i] & (1 + << + 5) + ? + "force default diffuse, " + : "", + data[i] & (1 + << + 4) + ? + "force default specular, " + : "", + data[i] & (1 + << + 3) + ? + "local depth ofs enable, " + : "", + data[i] & (1 + << + 1) + ? + "point sprite enable, " + : "", + data[i] & (1 + << + 0) + ? + "line AA enable, " + : ""); + break; + } + case 5: + { + instr_out(ctx, i, + "S5:%s%s%s%s%s" + "%s%s%s%s stencil_ref=0x%x, stencil_test=%s, " + "stencil_fail=%s, stencil_pass_z_fail=%s, " + "stencil_pass_z_pass=%s, %s%s%s%s\n", + data[i] & (0xf + << + 28) + ? + " write_disable=" + : "", + data[i] & (1 + << + 31) + ? "Alpha," : + "", + data[i] & (1 + << + 30) + ? "Red," : "", + data[i] & (1 + << + 29) + ? "Green," : + "", + data[i] & (1 + << + 28) + ? "Blue," : + "", + data[i] & (1 + << + 27) + ? + " force default point size," + : "", + data[i] & (1 + << + 26) + ? + " last pixel enable," + : "", + data[i] & (1 + << + 25) + ? + " global depth ofs enable," + : "", + data[i] & (1 + << + 24) + ? + " fog enable," + : "", + (data[i] >> + 16) & 0xff, + decode_compare_func + (data[i] >> + 13), + decode_stencil_op + (data[i] >> + 10), + decode_stencil_op + (data[i] >> + 7), + decode_stencil_op + (data[i] >> + 4), + data[i] & (1 + << + 3) + ? + "stencil write enable, " + : "", + data[i] & (1 + << + 2) + ? + "stencil test enable, " + : "", + data[i] & (1 + << + 1) + ? + "color dither enable, " + : "", + data[i] & (1 + << + 0) + ? + "logicop enable, " + : ""); + } + break; + case 6: + instr_out(ctx, i, + "S6: %salpha_test=%s, alpha_ref=0x%x, " + "depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, " + "%s%stristrip_provoking_vertex=%i\n", + data[i] & (1 << 31) ? + "alpha test enable, " + : "", + decode_compare_func + (data[i] >> 28), + data[i] & (0xff << + 20), + decode_compare_func + (data[i] >> 16), + data[i] & (1 << 15) ? + "cbuf blend enable, " + : "", + decode_blend_fact(data + [i] + >> + 8), + decode_blend_fact(data + [i] + >> + 4), + data[i] & (1 << 3) ? + "depth write enable, " + : "", + data[i] & (1 << 2) ? + "cbuf write enable, " + : "", + data[i] & (0x3)); + break; + case 7: + instr_out(ctx, i, + "S7: depth offset constant: 0x%08x\n", + data[i]); + break; + } + } else { + instr_out(ctx, i, + "S%d: 0x%08x\n", word, data[i]); + } + i++; + } + } + if (len != i) { + fprintf(out, + "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + } + return len; + case 0x03: + instr_out(ctx, 0, + "3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 6; word <= 14; word++) { + if (data[0] & (1 << word)) { + if (word == 6) + instr_out(ctx, i++, + "TBCF\n"); + else if (word >= 7 && word <= 10) { + instr_out(ctx, i++, + "TB%dC\n", word - 7); + instr_out(ctx, i++, + "TB%dA\n", word - 7); + } else if (word >= 11 && word <= 14) { + instr_out(ctx, i, + "TM%dS0: offset=0x%08x, %s\n", + word - 11, + data[i] & 0xfffffffe, + data[i] & 1 ? "use fence" : + ""); + i++; + instr_out(ctx, i, + "TM%dS1: height=%i, width=%i, %s\n", + word - 11, data[i] >> 21, + (data[i] >> 10) & 0x3ff, + data[i] & 2 ? (data[i] & 1 ? + "y-tiled" : + "x-tiled") : + ""); + i++; + instr_out(ctx, i, + "TM%dS2: pitch=%i, \n", + word - 11, + ((data[i] >> 21) + 1) * 4); + i++; + instr_out(ctx, i++, + "TM%dS3\n", word - 11); + instr_out(ctx, i++, + "TM%dS4: dflt color\n", + word - 11); + } + } + } + if (len != i) { + fprintf(out, + "Bad count in 3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); + } + return len; + case 0x00: + instr_out(ctx, 0, "3DSTATE_MAP_STATE\n"); + len = (data[0] & 0x0000003f) + 2; + instr_out(ctx, 1, "mask\n"); + + i = 2; + for (map = 0; map <= 15; map++) { + if (data[1] & (1 << map)) { + int width, height, pitch, dword; + const char *tiling; + + dword = data[i]; + instr_out(ctx, i++, + "map %d MS2 %s%s%s\n", map, + dword & (1 << 31) ? + "untrusted surface, " : "", + dword & (1 << 1) ? + "vertical line stride enable, " : "", + dword & (1 << 0) ? + "vertical ofs enable, " : ""); + + dword = data[i]; + width = ((dword >> 10) & ((1 << 11) - 1)) + 1; + height = ((dword >> 21) & ((1 << 11) - 1)) + 1; + + tiling = "none"; + if (dword & (1 << 2)) + tiling = "fenced"; + else if (dword & (1 << 1)) + tiling = dword & (1 << 0) ? "Y" : "X"; + type = " BAD"; + format = "BAD"; + switch ((dword >> 7) & 0x7) { + case 1: + type = "8b"; + switch ((dword >> 3) & 0xf) { + case 0: + format = "I"; + break; + case 1: + format = "L"; + break; + case 4: + format = "A"; + break; + case 5: + format = " mono"; + break; + } + break; + case 2: + type = "16b"; + switch ((dword >> 3) & 0xf) { + case 0: + format = " rgb565"; + break; + case 1: + format = " argb1555"; + break; + case 2: + format = " argb4444"; + break; + case 5: + format = " ay88"; + break; + case 6: + format = " bump655"; + break; + case 7: + format = "I"; + break; + case 8: + format = "L"; + break; + case 9: + format = "A"; + break; + } + break; + case 3: + type = "32b"; + switch ((dword >> 3) & 0xf) { + case 0: + format = " argb8888"; + break; + case 1: + format = " abgr8888"; + break; + case 2: + format = " xrgb8888"; + break; + case 3: + format = " xbgr8888"; + break; + case 4: + format = " qwvu8888"; + break; + case 5: + format = " axvu8888"; + break; + case 6: + format = " lxvu8888"; + break; + case 7: + format = " xlvu8888"; + break; + case 8: + format = " argb2101010"; + break; + case 9: + format = " abgr2101010"; + break; + case 10: + format = " awvu2101010"; + break; + case 11: + format = " gr1616"; + break; + case 12: + format = " vu1616"; + break; + case 13: + format = " xI824"; + break; + case 14: + format = " xA824"; + break; + case 15: + format = " xL824"; + break; + } + break; + case 5: + type = "422"; + switch ((dword >> 3) & 0xf) { + case 0: + format = " yuv_swapy"; + break; + case 1: + format = " yuv"; + break; + case 2: + format = " yuv_swapuv"; + break; + case 3: + format = " yuv_swapuvy"; + break; + } + break; + case 6: + type = "compressed"; + switch ((dword >> 3) & 0x7) { + case 0: + format = " dxt1"; + break; + case 1: + format = " dxt2_3"; + break; + case 2: + format = " dxt4_5"; + break; + case 3: + format = " fxt1"; + break; + case 4: + format = " dxt1_rb"; + break; + } + break; + case 7: + type = "4b indexed"; + switch ((dword >> 3) & 0xf) { + case 7: + format = " argb8888"; + break; + } + break; + } + dword = data[i]; + instr_out(ctx, i++, + "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n", + map, width, height, type, format, + tiling, + dword & (1 << 9) ? " palette select" : + ""); + + dword = data[i]; + pitch = + 4 * (((dword >> 21) & ((1 << 11) - 1)) + 1); + instr_out(ctx, i++, + "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n", + map, pitch, (dword >> 9) & 0x3f, + dword & 0xff, (dword >> 15) & 0x3f, + dword & (1 << 8) ? "miplayout legacy" + : "miplayout right"); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_MAP_STATE\n"); + return len; + } + return len; + case 0x06: + instr_out(ctx, 0, + "3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + len = (data[0] & 0x000000ff) + 2; + + i = 2; + for (c = 0; c <= 31; c++) { + if (data[1] & (1 << c)) { + instr_out(ctx, i, "C%d.X = %f\n", c, + int_as_float(data[i])); + i++; + instr_out(ctx, i, "C%d.Y = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(ctx, i, "C%d.Z = %f\n", + c, int_as_float(data[i])); + i++; + instr_out(ctx, i, "C%d.W = %f\n", + c, int_as_float(data[i])); + i++; + } + } + if (len != i) { + fprintf(out, + "Bad count in 3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + } + return len; + case 0x05: + instr_out(ctx, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n"); + len = (data[0] & 0x000000ff) + 2; + if ((len - 1) % 3 != 0 || len > 370) { + fprintf(out, + "Bad count in 3DSTATE_PIXEL_SHADER_PROGRAM\n"); + } + i = 1; + for (instr = 0; instr < (len - 1) / 3; instr++) { + char instr_prefix[10]; + + sprintf(instr_prefix, "PS%03d", instr); + i915_decode_instruction(ctx, i, + instr_prefix); + i += 3; + } + return len; + case 0x01: + if (IS_GEN2(devid)) + break; + instr_out(ctx, 0, "3DSTATE_SAMPLER_STATE\n"); + instr_out(ctx, 1, "mask\n"); + len = (data[0] & 0x0000003f) + 2; + i = 2; + for (sampler = 0; sampler <= 15; sampler++) { + if (data[1] & (1 << sampler)) { + uint32_t dword; + const char *mip_filter = ""; + + dword = data[i]; + switch ((dword >> 20) & 0x3) { + case 0: + mip_filter = "none"; + break; + case 1: + mip_filter = "nearest"; + break; + case 3: + mip_filter = "linear"; + break; + } + instr_out(ctx, i++, + "sampler %d SS2:%s%s%s " + "base_mip_level=%i, mip_filter=%s, mag_filter=%s, min_filter=%s " + "lod_bias=%.2f,%s max_aniso=%i, shadow_func=%s\n", + sampler, + dword & (1 << 31) ? " reverse gamma," + : "", + dword & (1 << 30) ? " packed2planar," + : "", + dword & (1 << 29) ? + " colorspace conversion," : "", + (dword >> 22) & 0x1f, mip_filter, + decode_sample_filter(dword >> 17), + decode_sample_filter(dword >> 14), + ((dword >> 5) & 0x1ff) / (0x10 * 1.0), + dword & (1 << 4) ? " shadow," : "", + dword & (1 << 3) ? 4 : 2, + decode_compare_func(dword)); + dword = data[i]; + instr_out(ctx, i++, + "sampler %d SS3: min_lod=%.2f,%s " + "tcmode_x=%s, tcmode_y=%s, tcmode_z=%s,%s texmap_idx=%i,%s\n", + sampler, + ((dword >> 24) & 0xff) / (0x10 * 1.0), + dword & (1 << 17) ? + " kill pixel enable," : "", + decode_tex_coord_mode(dword >> 12), + decode_tex_coord_mode(dword >> 9), + decode_tex_coord_mode(dword >> 6), + dword & (1 << 5) ? + " normalized coords," : "", + (dword >> 1) & 0xf, + dword & (1 << 0) ? " deinterlacer," : + ""); + dword = data[i]; + instr_out(ctx, i++, + "sampler %d SS4: border color\n", + sampler); + } + } + if (len != i) { + fprintf(out, "Bad count in 3DSTATE_SAMPLER_STATE\n"); + } + return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + + if (len != 2) + fprintf(out, + "Bad count in 3DSTATE_DEST_BUFFER_VARIABLES\n"); + + instr_out(ctx, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: + format = "g8"; + break; + case 0x1: + format = "x1r5g5b5"; + break; + case 0x2: + format = "r5g6b5"; + break; + case 0x3: + format = "a8r8g8b8"; + break; + case 0x4: + format = "ycrcb_swapy"; + break; + case 0x5: + format = "ycrcb_normal"; + break; + case 0x6: + format = "ycrcb_swapuv"; + break; + case 0x7: + format = "ycrcb_swapuvy"; + break; + case 0x8: + format = "a4r4g4b4"; + break; + case 0x9: + format = "a1r5g5b5"; + break; + case 0xa: + format = "a2r10g10b10"; + break; + default: + format = "BAD"; + break; + } + switch ((data[1] >> 2) & 0x3) { + case 0x0: + zformat = "u16"; + break; + case 0x1: + zformat = "f16"; + break; + case 0x2: + zformat = "u24x8"; + break; + default: + zformat = "BAD"; + break; + } + instr_out(ctx, 1, + "%s format, %s depth format, early Z %sabled\n", + format, zformat, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; + + case 0x8e: + { + const char *name, *tiling; + + len = (data[0] & 0x0000000f) + 2; + if (len != 3) + fprintf(out, + "Bad count in 3DSTATE_BUFFER_INFO\n"); + + switch ((data[1] >> 24) & 0x7) { + case 0x3: + name = "color"; + break; + case 0x7: + name = "depth"; + break; + default: + name = "unknown"; + break; + } + + tiling = "none"; + if (data[1] & (1 << 23)) + tiling = "fenced"; + else if (data[1] & (1 << 22)) + tiling = data[1] & (1 << 21) ? "Y" : "X"; + + instr_out(ctx, 0, "3DSTATE_BUFFER_INFO\n"); + instr_out(ctx, 1, + "%s, tiling = %s, pitch=%d\n", name, tiling, + data[1] & 0xffff); + + instr_out(ctx, 2, "address\n"); + return len; + } + case 0x81: + len = (data[0] & 0x0000000f) + 2; + + if (len != 3) + fprintf(out, + "Bad count in 3DSTATE_SCISSOR_RECTANGLE\n"); + + instr_out(ctx, 0, "3DSTATE_SCISSOR_RECTANGLE\n"); + instr_out(ctx, 1, "(%d,%d)\n", + data[1] & 0xffff, data[1] >> 16); + instr_out(ctx, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + + return len; + case 0x80: + len = (data[0] & 0x0000000f) + 2; + + if (len != 5) + fprintf(out, + "Bad count in 3DSTATE_DRAWING_RECTANGLE\n"); + + instr_out(ctx, 0, "3DSTATE_DRAWING_RECTANGLE\n"); + instr_out(ctx, 1, "%s\n", + data[1] & (1 << 30) ? "depth ofs disabled " : ""); + instr_out(ctx, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + instr_out(ctx, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + instr_out(ctx, 4, "(%d,%d)\n", + data[4] & 0xffff, data[4] >> 16); + + return len; + case 0x9c: + len = (data[0] & 0x0000000f) + 2; + + if (len != 7) + fprintf(out, "Bad count in 3DSTATE_CLEAR_PARAMETERS\n"); + + instr_out(ctx, 0, "3DSTATE_CLEAR_PARAMETERS\n"); + instr_out(ctx, 1, "prim_type=%s, clear=%s%s%s\n", + data[1] & (1 << 16) ? "CLEAR_RECT" : "ZONE_INIT", + data[1] & (1 << 2) ? "color," : "", + data[1] & (1 << 1) ? "depth," : "", + data[1] & (1 << 0) ? "stencil," : ""); + instr_out(ctx, 2, "clear color\n"); + instr_out(ctx, 3, "clear depth/stencil\n"); + instr_out(ctx, 4, "color value (rgba8888)\n"); + instr_out(ctx, 5, "depth value %f\n", + int_as_float(data[5])); + instr_out(ctx, 6, "clear stencil\n"); + return len; + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) { + opcode_3d_1d = &opcodes_3d_1d[idx]; + if (opcode_3d_1d->i830_only && !IS_GEN2(devid)) + continue; + + if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) { + len = 1; + + instr_out(ctx, 0, "%s\n", + opcode_3d_1d->name); + if (opcode_3d_1d->max_len > 1) { + len = (data[0] & 0x0000ffff) + 2; + if (len < opcode_3d_1d->min_len || + len > opcode_3d_1d->max_len) { + fprintf(out, "Bad count in %s\n", + opcode_3d_1d->name); + } + } + + for (i = 1; i < len; i++) { + instr_out(ctx, i, "dword %d\n", i); + } + + return len; + } + } + + instr_out(ctx, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", + opcode); + return 1; +} + +static int +decode_3d_primitive(struct drm_intel_decode *ctx) +{ + uint32_t *data = ctx->data; + uint32_t count = ctx->count; + char immediate = (data[0] & (1 << 23)) == 0; + unsigned int len, i, j, ret; + const char *primtype; + int original_s2 = saved_s2; + int original_s4 = saved_s4; + + switch ((data[0] >> 18) & 0xf) { + case 0x0: + primtype = "TRILIST"; + break; + case 0x1: + primtype = "TRISTRIP"; + break; + case 0x2: + primtype = "TRISTRIP_REVERSE"; + break; + case 0x3: + primtype = "TRIFAN"; + break; + case 0x4: + primtype = "POLYGON"; + break; + case 0x5: + primtype = "LINELIST"; + break; + case 0x6: + primtype = "LINESTRIP"; + break; + case 0x7: + primtype = "RECTLIST"; + break; + case 0x8: + primtype = "POINTLIST"; + break; + case 0x9: + primtype = "DIB"; + break; + case 0xa: + primtype = "CLEAR_RECT"; + saved_s4 = 3 << 6; + saved_s2 = ~0; + break; + default: + primtype = "unknown"; + break; + } + + /* XXX: 3DPRIM_DIB not supported */ + if (immediate) { + len = (data[0] & 0x0003ffff) + 2; + instr_out(ctx, 0, "3DPRIMITIVE inline %s\n", + primtype); + if (count < len) + BUFFER_FAIL(count, len, "3DPRIMITIVE inline"); + if (!saved_s2_set || !saved_s4_set) { + fprintf(out, "unknown vertex format\n"); + for (i = 1; i < len; i++) { + instr_out(ctx, i, + " vertex data (%f float)\n", + int_as_float(data[i])); + } + } else { + unsigned int vertex = 0; + for (i = 1; i < len;) { + unsigned int tc; + +#define VERTEX_OUT(fmt, ...) do { \ + if (i < len) \ + instr_out(ctx, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \ + else \ + fprintf(out, " missing data in V%d\n", vertex); \ + i++; \ +} while (0) + + VERTEX_OUT("X = %f", int_as_float(data[i])); + VERTEX_OUT("Y = %f", int_as_float(data[i])); + switch (saved_s4 >> 6 & 0x7) { + case 0x1: + VERTEX_OUT("Z = %f", + int_as_float(data[i])); + break; + case 0x2: + VERTEX_OUT("Z = %f", + int_as_float(data[i])); + VERTEX_OUT("W = %f", + int_as_float(data[i])); + break; + case 0x3: + break; + case 0x4: + VERTEX_OUT("W = %f", + int_as_float(data[i])); + break; + default: + fprintf(out, "bad S4 position mask\n"); + } + + if (saved_s4 & (1 << 10)) { + VERTEX_OUT + ("color = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 11)) { + VERTEX_OUT + ("spec = (A=0x%02x, R=0x%02x, G=0x%02x, " + "B=0x%02x)", data[i] >> 24, + (data[i] >> 16) & 0xff, + (data[i] >> 8) & 0xff, + data[i] & 0xff); + } + if (saved_s4 & (1 << 12)) + VERTEX_OUT("width = 0x%08x)", data[i]); + + for (tc = 0; tc <= 7; tc++) { + switch ((saved_s2 >> (tc * 4)) & 0xf) { + case 0x0: + VERTEX_OUT("T%d.X = %f", tc, + int_as_float(data + [i])); + VERTEX_OUT("T%d.Y = %f", tc, + int_as_float(data + [i])); + break; + case 0x1: + VERTEX_OUT("T%d.X = %f", tc, + int_as_float(data + [i])); + VERTEX_OUT("T%d.Y = %f", tc, + int_as_float(data + [i])); + VERTEX_OUT("T%d.Z = %f", tc, + int_as_float(data + [i])); + break; + case 0x2: + VERTEX_OUT("T%d.X = %f", tc, + int_as_float(data + [i])); + VERTEX_OUT("T%d.Y = %f", tc, + int_as_float(data + [i])); + VERTEX_OUT("T%d.Z = %f", tc, + int_as_float(data + [i])); + VERTEX_OUT("T%d.W = %f", tc, + int_as_float(data + [i])); + break; + case 0x3: + VERTEX_OUT("T%d.X = %f", tc, + int_as_float(data + [i])); + break; + case 0x4: + VERTEX_OUT + ("T%d.XY = 0x%08x half-float", + tc, data[i]); + break; + case 0x5: + VERTEX_OUT + ("T%d.XY = 0x%08x half-float", + tc, data[i]); + VERTEX_OUT + ("T%d.ZW = 0x%08x half-float", + tc, data[i]); + break; + case 0xf: + break; + default: + fprintf(out, + "bad S2.T%d format\n", + tc); + } + } + vertex++; + } + } + + ret = len; + } else { + /* indirect vertices */ + len = data[0] & 0x0000ffff; /* index count */ + if (data[0] & (1 << 17)) { + /* random vertex access */ + if (count < (len + 1) / 2 + 1) { + BUFFER_FAIL(count, (len + 1) / 2 + 1, + "3DPRIMITIVE random indirect"); + } + instr_out(ctx, 0, + "3DPRIMITIVE random indirect %s (%d)\n", + primtype, len); + if (len == 0) { + /* vertex indices continue until 0xffff is + * found + */ + for (i = 1; i < count; i++) { + if ((data[i] & 0xffff) == 0xffff) { + instr_out(ctx, i, + " indices: (terminator)\n"); + ret = i; + goto out; + } else if ((data[i] >> 16) == 0xffff) { + instr_out(ctx, i, + " indices: 0x%04x, (terminator)\n", + data[i] & 0xffff); + ret = i; + goto out; + } else { + instr_out(ctx, i, + " indices: 0x%04x, 0x%04x\n", + data[i] & 0xffff, + data[i] >> 16); + } + } + fprintf(out, + "3DPRIMITIVE: no terminator found in index buffer\n"); + ret = count; + goto out; + } else { + /* fixed size vertex index buffer */ + for (j = 1, i = 0; i < len; i += 2, j++) { + if (i * 2 == len - 1) { + instr_out(ctx, j, + " indices: 0x%04x\n", + data[j] & 0xffff); + } else { + instr_out(ctx, j, + " indices: 0x%04x, 0x%04x\n", + data[j] & 0xffff, + data[j] >> 16); + } + } + } + ret = (len + 1) / 2 + 1; + goto out; + } else { + /* sequential vertex access */ + instr_out(ctx, 0, + "3DPRIMITIVE sequential indirect %s, %d starting from " + "%d\n", primtype, len, data[1] & 0xffff); + instr_out(ctx, 1, " start\n"); + ret = 2; + goto out; + } + } + +out: + saved_s2 = original_s2; + saved_s4 = original_s4; + return ret; +} + +static int +decode_3d(struct drm_intel_decode *ctx) +{ + uint32_t opcode; + unsigned int idx; + uint32_t *data = ctx->data; + + struct { + uint32_t opcode; + unsigned int min_len; + unsigned int max_len; + const char *name; + } opcodes_3d[] = { + { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" }, + { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" }, + { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" }, + { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" }, + { 0x0d, 1, 1, "3DSTATE_MODES_4" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES"}, + }, *opcode_3d; + + opcode = (data[0] & 0x1f000000) >> 24; + + switch (opcode) { + case 0x1f: + return decode_3d_primitive(ctx); + case 0x1d: + return decode_3d_1d(ctx); + case 0x1c: + return decode_3d_1c(ctx); + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if (opcode == opcode_3d->opcode) { + unsigned int len = 1, i; + + instr_out(ctx, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { + fprintf(out, "Bad count in %s\n", + opcode_3d->name); + } + } + + for (i = 1; i < len; i++) { + instr_out(ctx, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(ctx, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode); + return 1; +} + +static const char *get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: + return "1D"; + case 1: + return "2D"; + case 2: + return "3D"; + case 3: + return "CUBE"; + case 4: + return "BUFFER"; + case 7: + return "NULL"; + default: + return "unknown"; + } +} + +static const char *get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: + return "s8_z24float"; + case 1: + return "z32float"; + case 2: + return "z24s8"; + case 5: + return "z16"; + default: + return "unknown"; + } +} + +static const char *get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: + return "X"; + case 1: + return "Y"; + case 2: + return "Z"; + case 3: + return "W"; + default: + return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char *get_965_prim_type(uint32_t primtype) +{ + switch (primtype) { + case 0x01: + return "point list"; + case 0x02: + return "line list"; + case 0x03: + return "line strip"; + case 0x04: + return "tri list"; + case 0x05: + return "tri strip"; + case 0x06: + return "tri fan"; + case 0x07: + return "quad list"; + case 0x08: + return "quad strip"; + case 0x09: + return "line list adj"; + case 0x0a: + return "line strip adj"; + case 0x0b: + return "tri list adj"; + case 0x0c: + return "tri strip adj"; + case 0x0d: + return "tri strip reverse"; + case 0x0e: + return "polygon"; + case 0x0f: + return "rect list"; + case 0x10: + return "line loop"; + case 0x11: + return "point list bf"; + case 0x12: + return "line strip cont"; + case 0x13: + return "line strip bf"; + case 0x14: + return "line strip cont bf"; + case 0x15: + return "tri fan no stipple"; + default: + return "fail"; + } +} + +static int +i965_decode_urb_fence(struct drm_intel_decode *ctx, int len) +{ + uint32_t vs_fence, clip_fence, gs_fence, sf_fence, vfe_fence, cs_fence; + uint32_t *data = ctx->data; + + if (len != 3) + fprintf(out, "Bad count in URB_FENCE\n"); + + vs_fence = data[1] & 0x3ff; + gs_fence = (data[1] >> 10) & 0x3ff; + clip_fence = (data[1] >> 20) & 0x3ff; + sf_fence = data[2] & 0x3ff; + vfe_fence = (data[2] >> 10) & 0x3ff; + cs_fence = (data[2] >> 20) & 0x7ff; + + instr_out(ctx, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + instr_out(ctx, 1, + "vs fence: %d, clip_fence: %d, gs_fence: %d\n", + vs_fence, clip_fence, gs_fence); + instr_out(ctx, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + sf_fence, vfe_fence, cs_fence); + if (gs_fence < vs_fence) + fprintf(out, "gs fence < vs fence!\n"); + if (clip_fence < gs_fence) + fprintf(out, "clip fence < gs fence!\n"); + if (sf_fence < clip_fence) + fprintf(out, "sf fence < clip fence!\n"); + if (cs_fence < sf_fence) + fprintf(out, "cs fence < sf fence!\n"); + + return len; +} + +static void +state_base_out(struct drm_intel_decode *ctx, unsigned int index, + const char *name) +{ + if (ctx->data[index] & 1) { + instr_out(ctx, index, + "%s state base address 0x%08x\n", name, + ctx->data[index] & ~1); + } else { + instr_out(ctx, index, "%s state base not updated\n", + name); + } +} + +static void +state_max_out(struct drm_intel_decode *ctx, unsigned int index, + const char *name) +{ + if (ctx->data[index] & 1) { + if (ctx->data[index] == 1) { + instr_out(ctx, index, + "%s state upper bound disabled\n", name); + } else { + instr_out(ctx, index, + "%s state upper bound 0x%08x\n", name, + ctx->data[index] & ~1); + } + } else { + instr_out(ctx, index, + "%s state upper bound not updated\n", name); + } +} + +static int +gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, "3DSTATE_VIEWPORT_STATE_POINTERS_CC\n"); + instr_out(ctx, 1, "pointer to CC viewport\n"); + + return 2; +} + +static int +gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP\n"); + instr_out(ctx, 1, "pointer to SF_CLIP viewport\n"); + + return 2; +} + +static int +gen7_3DSTATE_BLEND_STATE_POINTERS(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, "3DSTATE_BLEND_STATE_POINTERS\n"); + instr_out(ctx, 1, "pointer to BLEND_STATE at 0x%08x (%s)\n", + ctx->data[1] & ~1, + (ctx->data[1] & 1) ? "changed" : "unchanged"); + + return 2; +} + +static int +gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, "3DSTATE_DEPTH_STENCIL_STATE_POINTERS\n"); + instr_out(ctx, 1, + "pointer to DEPTH_STENCIL_STATE at 0x%08x (%s)\n", + ctx->data[1] & ~1, + (ctx->data[1] & 1) ? "changed" : "unchanged"); + + return 2; +} + +static int +gen7_3DSTATE_HIER_DEPTH_BUFFER(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, "3DSTATE_HIER_DEPTH_BUFFER\n"); + instr_out(ctx, 1, "pitch %db\n", + (ctx->data[1] & 0x1ffff) + 1); + instr_out(ctx, 2, "pointer to HiZ buffer\n"); + + return 3; +} + +static int +gen6_3DSTATE_CC_STATE_POINTERS(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, "3DSTATE_CC_STATE_POINTERS\n"); + instr_out(ctx, 1, "blend change %d\n", ctx->data[1] & 1); + instr_out(ctx, 2, "depth stencil change %d\n", + ctx->data[2] & 1); + instr_out(ctx, 3, "cc change %d\n", ctx->data[3] & 1); + + return 4; +} + +static int +gen7_3DSTATE_CC_STATE_POINTERS(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, "3DSTATE_CC_STATE_POINTERS\n"); + instr_out(ctx, 1, "pointer to COLOR_CALC_STATE at 0x%08x " + "(%s)\n", + ctx->data[1] & ~1, + (ctx->data[1] & 1) ? "changed" : "unchanged"); + + return 2; +} + +static int +gen7_3DSTATE_URB_unit(struct drm_intel_decode *ctx, const char *unit) +{ + int start_kb = ((ctx->data[1] >> 25) & 0x3f) * 8; + /* the field is # of 512-bit rows - 1, we print bytes */ + int entry_size = (((ctx->data[1] >> 16) & 0x1ff) + 1); + int nr_entries = ctx->data[1] & 0xffff; + + instr_out(ctx, 0, "3DSTATE_URB_%s\n", unit); + instr_out(ctx, 1, + "%dKB start, size=%d 64B rows, nr_entries=%d, total size %dB\n", + start_kb, entry_size, nr_entries, nr_entries * 64 * entry_size); + + return 2; +} + +static int +gen7_3DSTATE_URB_VS(struct drm_intel_decode *ctx) +{ + return gen7_3DSTATE_URB_unit(ctx, "VS"); +} + +static int +gen7_3DSTATE_URB_HS(struct drm_intel_decode *ctx) +{ + return gen7_3DSTATE_URB_unit(ctx, "HS"); +} + +static int +gen7_3DSTATE_URB_DS(struct drm_intel_decode *ctx) +{ + return gen7_3DSTATE_URB_unit(ctx, "DS"); +} + +static int +gen7_3DSTATE_URB_GS(struct drm_intel_decode *ctx) +{ + return gen7_3DSTATE_URB_unit(ctx, "GS"); +} + +static int +gen7_3DSTATE_CONSTANT(struct drm_intel_decode *ctx, const char *unit) +{ + int rlen[4]; + + rlen[0] = (ctx->data[1] >> 0) & 0xffff; + rlen[1] = (ctx->data[1] >> 16) & 0xffff; + rlen[2] = (ctx->data[2] >> 0) & 0xffff; + rlen[3] = (ctx->data[2] >> 16) & 0xffff; + + instr_out(ctx, 0, "3DSTATE_CONSTANT_%s\n", unit); + instr_out(ctx, 1, "len 0 = %d, len 1 = %d\n", rlen[0], rlen[1]); + instr_out(ctx, 2, "len 2 = %d, len 3 = %d\n", rlen[2], rlen[3]); + instr_out(ctx, 3, "pointer to constbuf 0\n"); + instr_out(ctx, 4, "pointer to constbuf 1\n"); + instr_out(ctx, 5, "pointer to constbuf 2\n"); + instr_out(ctx, 6, "pointer to constbuf 3\n"); + + return 7; +} + +static int +gen7_3DSTATE_CONSTANT_VS(struct drm_intel_decode *ctx) +{ + return gen7_3DSTATE_CONSTANT(ctx, "VS"); +} + +static int +gen7_3DSTATE_CONSTANT_GS(struct drm_intel_decode *ctx) +{ + return gen7_3DSTATE_CONSTANT(ctx, "GS"); +} + +static int +gen7_3DSTATE_CONSTANT_PS(struct drm_intel_decode *ctx) +{ + return gen7_3DSTATE_CONSTANT(ctx, "PS"); +} + +static int +gen7_3DSTATE_CONSTANT_DS(struct drm_intel_decode *ctx) +{ + return gen7_3DSTATE_CONSTANT(ctx, "DS"); +} + +static int +gen7_3DSTATE_CONSTANT_HS(struct drm_intel_decode *ctx) +{ + return gen7_3DSTATE_CONSTANT(ctx, "HS"); +} + + +static int +gen6_3DSTATE_WM(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, "3DSTATE_WM\n"); + instr_out(ctx, 1, "kernel start pointer 0\n"); + instr_out(ctx, 2, + "SPF=%d, VME=%d, Sampler Count %d, " + "Binding table count %d\n", + (ctx->data[2] >> 31) & 1, + (ctx->data[2] >> 30) & 1, + (ctx->data[2] >> 27) & 7, + (ctx->data[2] >> 18) & 0xff); + instr_out(ctx, 3, "scratch offset\n"); + instr_out(ctx, 4, + "Depth Clear %d, Depth Resolve %d, HiZ Resolve %d, " + "Dispatch GRF start[0] %d, start[1] %d, start[2] %d\n", + (ctx->data[4] & (1 << 30)) != 0, + (ctx->data[4] & (1 << 28)) != 0, + (ctx->data[4] & (1 << 27)) != 0, + (ctx->data[4] >> 16) & 0x7f, + (ctx->data[4] >> 8) & 0x7f, + (ctx->data[4] & 0x7f)); + instr_out(ctx, 5, + "MaxThreads %d, PS KillPixel %d, PS computed Z %d, " + "PS use sourceZ %d, Thread Dispatch %d, PS use sourceW %d, " + "Dispatch32 %d, Dispatch16 %d, Dispatch8 %d\n", + ((ctx->data[5] >> 25) & 0x7f) + 1, + (ctx->data[5] & (1 << 22)) != 0, + (ctx->data[5] & (1 << 21)) != 0, + (ctx->data[5] & (1 << 20)) != 0, + (ctx->data[5] & (1 << 19)) != 0, + (ctx->data[5] & (1 << 8)) != 0, + (ctx->data[5] & (1 << 2)) != 0, + (ctx->data[5] & (1 << 1)) != 0, + (ctx->data[5] & (1 << 0)) != 0); + instr_out(ctx, 6, + "Num SF output %d, Pos XY offset %d, ZW interp mode %d , " + "Barycentric interp mode 0x%x, Point raster rule %d, " + "Multisample mode %d, " + "Multisample Dispatch mode %d\n", + (ctx->data[6] >> 20) & 0x3f, + (ctx->data[6] >> 18) & 3, + (ctx->data[6] >> 16) & 3, + (ctx->data[6] >> 10) & 0x3f, + (ctx->data[6] & (1 << 9)) != 0, + (ctx->data[6] >> 1) & 3, + (ctx->data[6] & 1)); + instr_out(ctx, 7, "kernel start pointer 1\n"); + instr_out(ctx, 8, "kernel start pointer 2\n"); + + return 9; +} + +static int +gen7_3DSTATE_WM(struct drm_intel_decode *ctx) +{ + const char *computed_depth = ""; + const char *early_depth = ""; + const char *zw_interp = ""; + + switch ((ctx->data[1] >> 23) & 0x3) { + case 0: + computed_depth = ""; + break; + case 1: + computed_depth = "computed depth"; + break; + case 2: + computed_depth = "computed depth >="; + break; + case 3: + computed_depth = "computed depth <="; + break; + } + + switch ((ctx->data[1] >> 21) & 0x3) { + case 0: + early_depth = ""; + break; + case 1: + early_depth = ", EDSC_PSEXEC"; + break; + case 2: + early_depth = ", EDSC_PREPS"; + break; + case 3: + early_depth = ", BAD EDSC"; + break; + } + + switch ((ctx->data[1] >> 17) & 0x3) { + case 0: + early_depth = ""; + break; + case 1: + early_depth = ", BAD ZW interp"; + break; + case 2: + early_depth = ", ZW centroid"; + break; + case 3: + early_depth = ", ZW sample"; + break; + } + + instr_out(ctx, 0, "3DSTATE_WM\n"); + instr_out(ctx, 1, "(%s%s%s%s%s%s)%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + (ctx->data[1] & (1 << 11)) ? "PP " : "", + (ctx->data[1] & (1 << 12)) ? "PC " : "", + (ctx->data[1] & (1 << 13)) ? "PS " : "", + (ctx->data[1] & (1 << 14)) ? "NPP " : "", + (ctx->data[1] & (1 << 15)) ? "NPC " : "", + (ctx->data[1] & (1 << 16)) ? "NPS " : "", + (ctx->data[1] & (1 << 30)) ? ", depth clear" : "", + (ctx->data[1] & (1 << 29)) ? "" : ", disabled", + (ctx->data[1] & (1 << 28)) ? ", depth resolve" : "", + (ctx->data[1] & (1 << 27)) ? ", hiz resolve" : "", + (ctx->data[1] & (1 << 25)) ? ", kill" : "", + computed_depth, + early_depth, + zw_interp, + (ctx->data[1] & (1 << 20)) ? ", source depth" : "", + (ctx->data[1] & (1 << 19)) ? ", source W" : "", + (ctx->data[1] & (1 << 10)) ? ", coverage" : "", + (ctx->data[1] & (1 << 4)) ? ", poly stipple" : "", + (ctx->data[1] & (1 << 3)) ? ", line stipple" : "", + (ctx->data[1] & (1 << 2)) ? ", point UL" : ", point UR" + ); + instr_out(ctx, 2, "MS\n"); + + return 3; +} + +static int +gen4_3DPRIMITIVE(struct drm_intel_decode *ctx) +{ + instr_out(ctx, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type((ctx->data[0] >> 10) & 0x1f), + (ctx->data[0] & (1 << 15)) ? "random" : "sequential"); + instr_out(ctx, 1, "vertex count\n"); + instr_out(ctx, 2, "start vertex\n"); + instr_out(ctx, 3, "instance count\n"); + instr_out(ctx, 4, "start instance\n"); + instr_out(ctx, 5, "index bias\n"); + + return 6; +} + +static int +gen7_3DPRIMITIVE(struct drm_intel_decode *ctx) +{ + bool indirect = !!(ctx->data[0] & (1 << 10)); + + instr_out(ctx, 0, + "3DPRIMITIVE: %s%s\n", + indirect ? " indirect" : "", + (ctx->data[0] & (1 << 8)) ? " predicated" : ""); + instr_out(ctx, 1, "%s %s\n", + get_965_prim_type(ctx->data[1] & 0x3f), + (ctx->data[1] & (1 << 8)) ? "random" : "sequential"); + instr_out(ctx, 2, indirect ? "ignored" : "vertex count\n"); + instr_out(ctx, 3, indirect ? "ignored" : "start vertex\n"); + instr_out(ctx, 4, indirect ? "ignored" : "instance count\n"); + instr_out(ctx, 5, indirect ? "ignored" : "start instance\n"); + instr_out(ctx, 6, indirect ? "ignored" : "index bias\n"); + + return 7; +} + +static int +decode_3d_965(struct drm_intel_decode *ctx) +{ + uint32_t opcode; + unsigned int len; + unsigned int i, j, sba_len; + const char *desc1 = NULL; + uint32_t *data = ctx->data; + uint32_t devid = ctx->devid; + + struct { + uint32_t opcode; + uint32_t len_mask; + int unsigned min_len; + int unsigned max_len; + const char *name; + int gen; + int (*func)(struct drm_intel_decode *ctx); + } opcodes_3d[] = { + { 0x6000, 0x00ff, 3, 3, "URB_FENCE" }, + { 0x6001, 0xffff, 2, 2, "CS_URB_STATE" }, + { 0x6002, 0x00ff, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 0xffff, 6, 10, "STATE_BASE_ADDRESS" }, + { 0x6102, 0xffff, 2, 2, "STATE_SIP" }, + { 0x6104, 0xffff, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 0xffff, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 0xffff, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 0xffff, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 0x00ff, 4, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7802, 0x00ff, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" }, + { 0x7805, 0x00ff, 7, 7, "3DSTATE_DEPTH_BUFFER", 7 }, + { 0x7805, 0x00ff, 3, 3, "3DSTATE_URB" }, + { 0x7804, 0x00ff, 3, 3, "3DSTATE_CLEAR_PARAMS" }, + { 0x7806, 0x00ff, 3, 3, "3DSTATE_STENCIL_BUFFER" }, + { 0x790f, 0x00ff, 3, 3, "3DSTATE_HIER_DEPTH_BUFFER", 6 }, + { 0x7807, 0x00ff, 3, 3, "3DSTATE_HIER_DEPTH_BUFFER", 7, gen7_3DSTATE_HIER_DEPTH_BUFFER }, + { 0x7808, 0x00ff, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 0x00ff, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 0x00ff, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 0xffff, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x780d, 0x00ff, 4, 4, "3DSTATE_VIEWPORT_STATE_POINTERS" }, + { 0x780e, 0xffff, 4, 4, NULL, 6, gen6_3DSTATE_CC_STATE_POINTERS }, + { 0x780e, 0x00ff, 2, 2, NULL, 7, gen7_3DSTATE_CC_STATE_POINTERS }, + { 0x780f, 0x00ff, 2, 2, "3DSTATE_SCISSOR_POINTERS" }, + { 0x7810, 0x00ff, 6, 6, "3DSTATE_VS" }, + { 0x7811, 0x00ff, 7, 7, "3DSTATE_GS" }, + { 0x7812, 0x00ff, 4, 4, "3DSTATE_CLIP" }, + { 0x7813, 0x00ff, 20, 20, "3DSTATE_SF", 6 }, + { 0x7813, 0x00ff, 7, 7, "3DSTATE_SF", 7 }, + { 0x7814, 0x00ff, 3, 3, "3DSTATE_WM", 7, gen7_3DSTATE_WM }, + { 0x7814, 0x00ff, 9, 9, "3DSTATE_WM", 6, gen6_3DSTATE_WM }, + { 0x7815, 0x00ff, 5, 5, "3DSTATE_CONSTANT_VS_STATE", 6 }, + { 0x7815, 0x00ff, 7, 7, "3DSTATE_CONSTANT_VS", 7, gen7_3DSTATE_CONSTANT_VS }, + { 0x7816, 0x00ff, 5, 5, "3DSTATE_CONSTANT_GS_STATE", 6 }, + { 0x7816, 0x00ff, 7, 7, "3DSTATE_CONSTANT_GS", 7, gen7_3DSTATE_CONSTANT_GS }, + { 0x7817, 0x00ff, 5, 5, "3DSTATE_CONSTANT_PS_STATE", 6 }, + { 0x7817, 0x00ff, 7, 7, "3DSTATE_CONSTANT_PS", 7, gen7_3DSTATE_CONSTANT_PS }, + { 0x7818, 0xffff, 2, 2, "3DSTATE_SAMPLE_MASK" }, + { 0x7819, 0x00ff, 7, 7, "3DSTATE_CONSTANT_HS", 7, gen7_3DSTATE_CONSTANT_HS }, + { 0x781a, 0x00ff, 7, 7, "3DSTATE_CONSTANT_DS", 7, gen7_3DSTATE_CONSTANT_DS }, + { 0x781b, 0x00ff, 7, 7, "3DSTATE_HS" }, + { 0x781c, 0x00ff, 4, 4, "3DSTATE_TE" }, + { 0x781d, 0x00ff, 6, 6, "3DSTATE_DS" }, + { 0x781e, 0x00ff, 3, 3, "3DSTATE_STREAMOUT" }, + { 0x781f, 0x00ff, 14, 14, "3DSTATE_SBE" }, + { 0x7820, 0x00ff, 8, 8, "3DSTATE_PS" }, + { 0x7821, 0x00ff, 2, 2, NULL, 7, gen7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP }, + { 0x7823, 0x00ff, 2, 2, NULL, 7, gen7_3DSTATE_VIEWPORT_STATE_POINTERS_CC }, + { 0x7824, 0x00ff, 2, 2, NULL, 7, gen7_3DSTATE_BLEND_STATE_POINTERS }, + { 0x7825, 0x00ff, 2, 2, NULL, 7, gen7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS }, + { 0x7826, 0x00ff, 2, 2, "3DSTATE_BINDING_TABLE_POINTERS_VS" }, + { 0x7827, 0x00ff, 2, 2, "3DSTATE_BINDING_TABLE_POINTERS_HS" }, + { 0x7828, 0x00ff, 2, 2, "3DSTATE_BINDING_TABLE_POINTERS_DS" }, + { 0x7829, 0x00ff, 2, 2, "3DSTATE_BINDING_TABLE_POINTERS_GS" }, + { 0x782a, 0x00ff, 2, 2, "3DSTATE_BINDING_TABLE_POINTERS_PS" }, + { 0x782b, 0x00ff, 2, 2, "3DSTATE_SAMPLER_STATE_POINTERS_VS" }, + { 0x782c, 0x00ff, 2, 2, "3DSTATE_SAMPLER_STATE_POINTERS_HS" }, + { 0x782d, 0x00ff, 2, 2, "3DSTATE_SAMPLER_STATE_POINTERS_DS" }, + { 0x782e, 0x00ff, 2, 2, "3DSTATE_SAMPLER_STATE_POINTERS_GS" }, + { 0x782f, 0x00ff, 2, 2, "3DSTATE_SAMPLER_STATE_POINTERS_PS" }, + { 0x7830, 0x00ff, 2, 2, NULL, 7, gen7_3DSTATE_URB_VS }, + { 0x7831, 0x00ff, 2, 2, NULL, 7, gen7_3DSTATE_URB_HS }, + { 0x7832, 0x00ff, 2, 2, NULL, 7, gen7_3DSTATE_URB_DS }, + { 0x7833, 0x00ff, 2, 2, NULL, 7, gen7_3DSTATE_URB_GS }, + { 0x7900, 0xffff, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 0xffff, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 0xffff, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 0xffff, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 0xffff, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 0xffff, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 0xffff, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 0xffff, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 0xffff, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 0xffff, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 0xffff, 3, 3, "3DSTATE_MULTISAMPLE", 6 }, + { 0x790d, 0xffff, 4, 4, "3DSTATE_MULTISAMPLE", 7 }, + { 0x7910, 0x00ff, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7912, 0x00ff, 2, 2, "3DSTATE_PUSH_CONSTANT_ALLOC_VS" }, + { 0x7913, 0x00ff, 2, 2, "3DSTATE_PUSH_CONSTANT_ALLOC_HS" }, + { 0x7914, 0x00ff, 2, 2, "3DSTATE_PUSH_CONSTANT_ALLOC_DS" }, + { 0x7915, 0x00ff, 2, 2, "3DSTATE_PUSH_CONSTANT_ALLOC_GS" }, + { 0x7916, 0x00ff, 2, 2, "3DSTATE_PUSH_CONSTANT_ALLOC_PS" }, + { 0x7917, 0x00ff, 2, 2+128*2, "3DSTATE_SO_DECL_LIST" }, + { 0x7918, 0x00ff, 4, 4, "3DSTATE_SO_BUFFER" }, + { 0x7a00, 0x00ff, 4, 6, "PIPE_CONTROL" }, + { 0x7b00, 0x00ff, 7, 7, NULL, 7, gen7_3DPRIMITIVE }, + { 0x7b00, 0x00ff, 6, 6, NULL, 0, gen4_3DPRIMITIVE }, + }, *opcode_3d = NULL; + + opcode = (data[0] & 0xffff0000) >> 16; + + for (i = 0; i < ARRAY_SIZE(opcodes_3d); i++) { + if (opcode != opcodes_3d[i].opcode) + continue; + + /* If it's marked as not our gen, skip. */ + if (opcodes_3d[i].gen && opcodes_3d[i].gen != ctx->gen) + continue; + + opcode_3d = &opcodes_3d[i]; + break; + } + + if (opcode_3d) { + if (opcode_3d->max_len == 1) + len = 1; + else + len = (data[0] & opcode_3d->len_mask) + 2; + + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { + fprintf(out, "Bad length %d in %s, expected %d-%d\n", + len, opcode_3d->name, + opcode_3d->min_len, opcode_3d->max_len); + } + } else { + len = (data[0] & 0x0000ffff) + 2; + } + + switch (opcode) { + case 0x6000: + return i965_decode_urb_fence(ctx, len); + case 0x6001: + instr_out(ctx, 0, "CS_URB_STATE\n"); + instr_out(ctx, 1, + "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, data[1] & 0x7); + return len; + case 0x6002: + instr_out(ctx, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + instr_out(ctx, 1, + "offset: 0x%08x, length: %d bytes\n", data[1] & ~0x3f, + ((data[1] & 0x3f) + 1) * 64); + return len; + case 0x6101: + i = 0; + instr_out(ctx, 0, "STATE_BASE_ADDRESS\n"); + i++; + + if (IS_GEN6(devid) || IS_GEN7(devid)) + sba_len = 10; + else if (IS_GEN5(devid)) + sba_len = 8; + else + sba_len = 6; + if (len != sba_len) + fprintf(out, "Bad count in STATE_BASE_ADDRESS\n"); + + state_base_out(ctx, i++, "general"); + state_base_out(ctx, i++, "surface"); + if (IS_GEN6(devid) || IS_GEN7(devid)) + state_base_out(ctx, i++, "dynamic"); + state_base_out(ctx, i++, "indirect"); + if (IS_GEN5(devid) || IS_GEN6(devid) || IS_GEN7(devid)) + state_base_out(ctx, i++, "instruction"); + + state_max_out(ctx, i++, "general"); + if (IS_GEN6(devid) || IS_GEN7(devid)) + state_max_out(ctx, i++, "dynamic"); + state_max_out(ctx, i++, "indirect"); + if (IS_GEN5(devid) || IS_GEN6(devid) || IS_GEN7(devid)) + state_max_out(ctx, i++, "instruction"); + + return len; + case 0x7800: + instr_out(ctx, 0, "3DSTATE_PIPELINED_POINTERS\n"); + instr_out(ctx, 1, "VS state\n"); + instr_out(ctx, 2, "GS state\n"); + instr_out(ctx, 3, "Clip state\n"); + instr_out(ctx, 4, "SF state\n"); + instr_out(ctx, 5, "WM state\n"); + instr_out(ctx, 6, "CC state\n"); + return len; + case 0x7801: + if (len != 6 && len != 4) + fprintf(out, + "Bad count in 3DSTATE_BINDING_TABLE_POINTERS\n"); + if (len == 6) { + instr_out(ctx, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + instr_out(ctx, 1, "VS binding table\n"); + instr_out(ctx, 2, "GS binding table\n"); + instr_out(ctx, 3, "Clip binding table\n"); + instr_out(ctx, 4, "SF binding table\n"); + instr_out(ctx, 5, "WM binding table\n"); + } else { + instr_out(ctx, 0, + "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, " + "GS mod %d, PS mod %d\n", + (data[0] & (1 << 8)) != 0, + (data[0] & (1 << 9)) != 0, + (data[0] & (1 << 12)) != 0); + instr_out(ctx, 1, "VS binding table\n"); + instr_out(ctx, 2, "GS binding table\n"); + instr_out(ctx, 3, "WM binding table\n"); + } + + return len; + case 0x7802: + instr_out(ctx, 0, + "3DSTATE_SAMPLER_STATE_POINTERS: VS mod %d, " + "GS mod %d, PS mod %d\n", (data[0] & (1 << 8)) != 0, + (data[0] & (1 << 9)) != 0, + (data[0] & (1 << 12)) != 0); + instr_out(ctx, 1, "VS sampler state\n"); + instr_out(ctx, 2, "GS sampler state\n"); + instr_out(ctx, 3, "WM sampler state\n"); + return len; + case 0x7805: + /* Actually 3DSTATE_DEPTH_BUFFER on gen7. */ + if (ctx->gen == 7) + break; + + instr_out(ctx, 0, "3DSTATE_URB\n"); + instr_out(ctx, 1, + "VS entries %d, alloc size %d (1024bit row)\n", + data[1] & 0xffff, ((data[1] >> 16) & 0x07f) + 1); + instr_out(ctx, 2, + "GS entries %d, alloc size %d (1024bit row)\n", + (data[2] >> 8) & 0x3ff, (data[2] & 7) + 1); + return len; + + case 0x7808: + if ((len - 1) % 4 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_BUFFERS\n"); + instr_out(ctx, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + int idx, access; + if (IS_GEN6(devid)) { + idx = 26; + access = 20; + } else { + idx = 27; + access = 26; + } + instr_out(ctx, i, + "buffer %d: %s, pitch %db\n", data[i] >> idx, + data[i] & (1 << access) ? "random" : + "sequential", data[i] & 0x07ff); + i++; + instr_out(ctx, i++, "buffer address\n"); + instr_out(ctx, i++, "max index\n"); + instr_out(ctx, i++, "mbz\n"); + } + return len; + + case 0x7809: + if ((len + 1) % 2 != 0) + fprintf(out, "Bad count in 3DSTATE_VERTEX_ELEMENTS\n"); + instr_out(ctx, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + for (i = 1; i < len;) { + instr_out(ctx, i, + "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> ((IS_GEN6(devid) || IS_GEN7(devid)) ? 26 : 27), + data[i] & (1 << ((IS_GEN6(devid) || IS_GEN7(devid)) ? 25 : 26)) ? + "" : "in", (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + instr_out(ctx, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + return len; + + case 0x780d: + instr_out(ctx, 0, + "3DSTATE_VIEWPORT_STATE_POINTERS\n"); + instr_out(ctx, 1, "clip\n"); + instr_out(ctx, 2, "sf\n"); + instr_out(ctx, 3, "cc\n"); + return len; + + case 0x780a: + instr_out(ctx, 0, "3DSTATE_INDEX_BUFFER\n"); + instr_out(ctx, 1, "beginning buffer address\n"); + instr_out(ctx, 2, "ending buffer address\n"); + return len; + + case 0x780f: + instr_out(ctx, 0, "3DSTATE_SCISSOR_POINTERS\n"); + instr_out(ctx, 1, "scissor rect offset\n"); + return len; + + case 0x7810: + instr_out(ctx, 0, "3DSTATE_VS\n"); + instr_out(ctx, 1, "kernel pointer\n"); + instr_out(ctx, 2, + "SPF=%d, VME=%d, Sampler Count %d, " + "Binding table count %d\n", (data[2] >> 31) & 1, + (data[2] >> 30) & 1, (data[2] >> 27) & 7, + (data[2] >> 18) & 0xff); + instr_out(ctx, 3, "scratch offset\n"); + instr_out(ctx, 4, + "Dispatch GRF start %d, VUE read length %d, " + "VUE read offset %d\n", (data[4] >> 20) & 0x1f, + (data[4] >> 11) & 0x3f, (data[4] >> 4) & 0x3f); + instr_out(ctx, 5, + "Max Threads %d, Vertex Cache %sable, " + "VS func %sable\n", ((data[5] >> 25) & 0x7f) + 1, + (data[5] & (1 << 1)) != 0 ? "dis" : "en", + (data[5] & 1) != 0 ? "en" : "dis"); + return len; + + case 0x7811: + instr_out(ctx, 0, "3DSTATE_GS\n"); + instr_out(ctx, 1, "kernel pointer\n"); + instr_out(ctx, 2, + "SPF=%d, VME=%d, Sampler Count %d, " + "Binding table count %d\n", (data[2] >> 31) & 1, + (data[2] >> 30) & 1, (data[2] >> 27) & 7, + (data[2] >> 18) & 0xff); + instr_out(ctx, 3, "scratch offset\n"); + instr_out(ctx, 4, + "Dispatch GRF start %d, VUE read length %d, " + "VUE read offset %d\n", (data[4] & 0xf), + (data[4] >> 11) & 0x3f, (data[4] >> 4) & 0x3f); + instr_out(ctx, 5, + "Max Threads %d, Rendering %sable\n", + ((data[5] >> 25) & 0x7f) + 1, + (data[5] & (1 << 8)) != 0 ? "en" : "dis"); + instr_out(ctx, 6, + "Reorder %sable, Discard Adjaceny %sable, " + "GS %sable\n", + (data[6] & (1 << 30)) != 0 ? "en" : "dis", + (data[6] & (1 << 29)) != 0 ? "en" : "dis", + (data[6] & (1 << 15)) != 0 ? "en" : "dis"); + return len; + + case 0x7812: + instr_out(ctx, 0, "3DSTATE_CLIP\n"); + instr_out(ctx, 1, + "UserClip distance cull test mask 0x%x\n", + data[1] & 0xff); + instr_out(ctx, 2, + "Clip %sable, API mode %s, Viewport XY test %sable, " + "Viewport Z test %sable, Guardband test %sable, Clip mode %d, " + "Perspective Divide %sable, Non-Perspective Barycentric %sable, " + "Tri Provoking %d, Line Provoking %d, Trifan Provoking %d\n", + (data[2] & (1 << 31)) != 0 ? "en" : "dis", + (data[2] & (1 << 30)) != 0 ? "D3D" : "OGL", + (data[2] & (1 << 28)) != 0 ? "en" : "dis", + (data[2] & (1 << 27)) != 0 ? "en" : "dis", + (data[2] & (1 << 26)) != 0 ? "en" : "dis", + (data[2] >> 13) & 7, + (data[2] & (1 << 9)) != 0 ? "dis" : "en", + (data[2] & (1 << 8)) != 0 ? "en" : "dis", + (data[2] >> 4) & 3, (data[2] >> 2) & 3, + (data[2] & 3)); + instr_out(ctx, 3, + "Min PointWidth %d, Max PointWidth %d, " + "Force Zero RTAIndex %sable, Max VPIndex %d\n", + (data[3] >> 17) & 0x7ff, (data[3] >> 6) & 0x7ff, + (data[3] & (1 << 5)) != 0 ? "en" : "dis", + (data[3] & 0xf)); + return len; + + case 0x7813: + if (ctx->gen == 7) + break; + + instr_out(ctx, 0, "3DSTATE_SF\n"); + instr_out(ctx, 1, + "Attrib Out %d, Attrib Swizzle %sable, VUE read length %d, " + "VUE read offset %d\n", (data[1] >> 22) & 0x3f, + (data[1] & (1 << 21)) != 0 ? "en" : "dis", + (data[1] >> 11) & 0x1f, (data[1] >> 4) & 0x3f); + instr_out(ctx, 2, + "Legacy Global DepthBias %sable, FrontFace fill %d, BF fill %d, " + "VP transform %sable, FrontWinding_%s\n", + (data[2] & (1 << 11)) != 0 ? "en" : "dis", + (data[2] >> 5) & 3, (data[2] >> 3) & 3, + (data[2] & (1 << 1)) != 0 ? "en" : "dis", + (data[2] & 1) != 0 ? "CCW" : "CW"); + instr_out(ctx, 3, + "AA %sable, CullMode %d, Scissor %sable, Multisample m ode %d\n", + (data[3] & (1 << 31)) != 0 ? "en" : "dis", + (data[3] >> 29) & 3, + (data[3] & (1 << 11)) != 0 ? "en" : "dis", + (data[3] >> 8) & 3); + instr_out(ctx, 4, + "Last Pixel %sable, SubPixel Precision %d, Use PixelWidth %d\n", + (data[4] & (1 << 31)) != 0 ? "en" : "dis", + (data[4] & (1 << 12)) != 0 ? 4 : 8, + (data[4] & (1 << 11)) != 0); + instr_out(ctx, 5, + "Global Depth Offset Constant %f\n", + *(float *)(&data[5])); + instr_out(ctx, 6, "Global Depth Offset Scale %f\n", + *(float *)(&data[6])); + instr_out(ctx, 7, "Global Depth Offset Clamp %f\n", + *(float *)(&data[7])); + + for (i = 0, j = 0; i < 8; i++, j += 2) + instr_out(ctx, i + 8, + "Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, " + "Source %d); Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, Source %d)\n", + j + 1, + (data[8 + i] & (1 << 31)) != 0 ? "W" : "", + (data[8 + i] & (1 << 30)) != 0 ? "Z" : "", + (data[8 + i] & (1 << 29)) != 0 ? "Y" : "", + (data[8 + i] & (1 << 28)) != 0 ? "X" : "", + (data[8 + i] >> 25) & 3, + (data[8 + i] >> 22) & 3, + (data[8 + i] >> 16) & 0x1f, j, + (data[8 + i] & (1 << 15)) != 0 ? "W" : "", + (data[8 + i] & (1 << 14)) != 0 ? "Z" : "", + (data[8 + i] & (1 << 13)) != 0 ? "Y" : "", + (data[8 + i] & (1 << 12)) != 0 ? "X" : "", + (data[8 + i] >> 9) & 3, + (data[8 + i] >> 6) & 3, (data[8 + i] & 0x1f)); + instr_out(ctx, 16, + "Point Sprite TexCoord Enable\n"); + instr_out(ctx, 17, "Const Interp Enable\n"); + instr_out(ctx, 18, + "Attrib 7-0 WrapShortest Enable\n"); + instr_out(ctx, 19, + "Attrib 15-8 WrapShortest Enable\n"); + + return len; + + case 0x7900: + instr_out(ctx, 0, "3DSTATE_DRAWING_RECTANGLE\n"); + instr_out(ctx, 1, "top left: %d,%d\n", + data[1] & 0xffff, (data[1] >> 16) & 0xffff); + instr_out(ctx, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, (data[2] >> 16) & 0xffff); + instr_out(ctx, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, ((int)data[3] >> 16) & 0xffff); + + return len; + + case 0x7905: + instr_out(ctx, 0, "3DSTATE_DEPTH_BUFFER\n"); + if (IS_GEN5(devid) || IS_GEN6(devid)) + instr_out(ctx, 1, + "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Separate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + else + instr_out(ctx, 1, + "%s, %s, pitch = %d bytes, %stiled\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not "); + instr_out(ctx, 2, "depth offset\n"); + instr_out(ctx, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + instr_out(ctx, 4, "volume depth\n"); + if (len >= 6) + instr_out(ctx, 5, "\n"); + if (len >= 7) { + if (IS_GEN6(devid)) + instr_out(ctx, 6, "\n"); + else + instr_out(ctx, 6, + "render target view extent\n"); + } + + return len; + + case 0x7a00: + if (IS_GEN6(devid) || IS_GEN7(devid)) { + if (len != 4 && len != 5) + fprintf(out, "Bad count in PIPE_CONTROL\n"); + + switch ((data[1] >> 14) & 0x3) { + case 0: + desc1 = "no write"; + break; + case 1: + desc1 = "qword write"; + break; + case 2: + desc1 = "PS_DEPTH_COUNT write"; + break; + case 3: + desc1 = "TIMESTAMP write"; + break; + } + instr_out(ctx, 0, "PIPE_CONTROL\n"); + instr_out(ctx, 1, + "%s, %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + desc1, + data[1] & (1 << 20) ? "cs stall, " : "", + data[1] & (1 << 19) ? + "global snapshot count reset, " : "", + data[1] & (1 << 18) ? "tlb invalidate, " : "", + data[1] & (1 << 17) ? "gfdt flush, " : "", + data[1] & (1 << 17) ? "media state clear, " : + "", + data[1] & (1 << 13) ? "depth stall, " : "", + data[1] & (1 << 12) ? + "render target cache flush, " : "", + data[1] & (1 << 11) ? + "instruction cache invalidate, " : "", + data[1] & (1 << 10) ? + "texture cache invalidate, " : "", + data[1] & (1 << 9) ? + "indirect state invalidate, " : "", + data[1] & (1 << 8) ? "notify irq, " : "", + data[1] & (1 << 7) ? "PIPE_CONTROL flush, " : + "", + data[1] & (1 << 6) ? "protect mem app_id, " : + "", data[1] & (1 << 5) ? "DC flush, " : "", + data[1] & (1 << 4) ? "vf fetch invalidate, " : + "", + data[1] & (1 << 3) ? + "constant cache invalidate, " : "", + data[1] & (1 << 2) ? + "state cache invalidate, " : "", + data[1] & (1 << 1) ? "stall at scoreboard, " : + "", + data[1] & (1 << 0) ? "depth cache flush, " : + ""); + if (len == 5) { + instr_out(ctx, 2, + "destination address\n"); + instr_out(ctx, 3, + "immediate dword low\n"); + instr_out(ctx, 4, + "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + instr_out(ctx, i, "\n"); + } + } + return len; + } else { + if (len != 4) + fprintf(out, "Bad count in PIPE_CONTROL\n"); + + switch ((data[0] >> 14) & 0x3) { + case 0: + desc1 = "no write"; + break; + case 1: + desc1 = "qword write"; + break; + case 2: + desc1 = "PS_DEPTH_COUNT write"; + break; + case 3: + desc1 = "TIMESTAMP write"; + break; + } + instr_out(ctx, 0, + "PIPE_CONTROL: %s, %sdepth stall, %sRC write flush, " + "%sinst flush\n", + desc1, + data[0] & (1 << 13) ? "" : "no ", + data[0] & (1 << 12) ? "" : "no ", + data[0] & (1 << 11) ? "" : "no "); + instr_out(ctx, 1, "destination address\n"); + instr_out(ctx, 2, "immediate dword low\n"); + instr_out(ctx, 3, "immediate dword high\n"); + return len; + } + } + + if (opcode_3d) { + if (opcode_3d->func) { + return opcode_3d->func(ctx); + } else { + instr_out(ctx, 0, "%s\n", opcode_3d->name); + + for (i = 1; i < len; i++) { + instr_out(ctx, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(ctx, 0, "3D UNKNOWN: 3d_965 opcode = 0x%x\n", + opcode); + return 1; +} + +static int +decode_3d_i830(struct drm_intel_decode *ctx) +{ + unsigned int idx; + uint32_t opcode; + uint32_t *data = ctx->data; + + struct { + uint32_t opcode; + unsigned int min_len; + unsigned int max_len; + const char *name; + } opcodes_3d[] = { + { 0x02, 1, 1, "3DSTATE_MODES_3" }, + { 0x03, 1, 1, "3DSTATE_ENABLES_1" }, + { 0x04, 1, 1, "3DSTATE_ENABLES_2" }, + { 0x05, 1, 1, "3DSTATE_VFT0" }, + { 0x06, 1, 1, "3DSTATE_AA" }, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + { 0x08, 1, 1, "3DSTATE_MODES_1" }, + { 0x09, 1, 1, "3DSTATE_STENCIL_TEST" }, + { 0x0a, 1, 1, "3DSTATE_VFT1" }, + { 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" }, + { 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" }, + { 0x0f, 1, 1, "3DSTATE_MODES_2" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x16, 1, 1, "3DSTATE_MODES_4"}, + }, *opcode_3d; + + opcode = (data[0] & 0x1f000000) >> 24; + + switch (opcode) { + case 0x1f: + return decode_3d_primitive(ctx); + case 0x1d: + return decode_3d_1d(ctx); + case 0x1c: + return decode_3d_1c(ctx); + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes_3d); idx++) { + opcode_3d = &opcodes_3d[idx]; + if ((data[0] & 0x1f000000) >> 24 == opcode_3d->opcode) { + unsigned int len = 1, i; + + instr_out(ctx, 0, "%s\n", opcode_3d->name); + if (opcode_3d->max_len > 1) { + len = (data[0] & 0xff) + 2; + if (len < opcode_3d->min_len || + len > opcode_3d->max_len) { + fprintf(out, "Bad count in %s\n", + opcode_3d->name); + } + } + + for (i = 1; i < len; i++) { + instr_out(ctx, i, "dword %d\n", i); + } + return len; + } + } + + instr_out(ctx, 0, "3D UNKNOWN: 3d_i830 opcode = 0x%x\n", + opcode); + return 1; +} + +drm_public struct drm_intel_decode * +drm_intel_decode_context_alloc(uint32_t devid) +{ + struct drm_intel_decode *ctx; + int gen = 0; + + if (IS_GEN8(devid)) + gen = 8; + else if (IS_GEN7(devid)) + gen = 7; + else if (IS_GEN6(devid)) + gen = 6; + else if (IS_GEN5(devid)) + gen = 5; + else if (IS_GEN4(devid)) + gen = 4; + else if (IS_GEN3(devid)) + gen = 3; + else if (IS_GEN2(devid)) + gen = 2; + else + /* Just assume future unknown platforms behave as gen8. */ + gen = 8; + + if (!gen) + return NULL; + + ctx = calloc(1, sizeof(struct drm_intel_decode)); + if (!ctx) + return NULL; + + ctx->devid = devid; + ctx->gen = gen; + ctx->out = stdout; + + return ctx; +} + +drm_public void +drm_intel_decode_context_free(struct drm_intel_decode *ctx) +{ + free(ctx); +} + +drm_public void +drm_intel_decode_set_dump_past_end(struct drm_intel_decode *ctx, + int dump_past_end) +{ + ctx->dump_past_end = !!dump_past_end; +} + +drm_public void +drm_intel_decode_set_batch_pointer(struct drm_intel_decode *ctx, + void *data, uint32_t hw_offset, int count) +{ + ctx->base_data = data; + ctx->base_hw_offset = hw_offset; + ctx->base_count = count; +} + +drm_public void +drm_intel_decode_set_head_tail(struct drm_intel_decode *ctx, + uint32_t head, uint32_t tail) +{ + ctx->head = head; + ctx->tail = tail; +} + +drm_public void +drm_intel_decode_set_output_file(struct drm_intel_decode *ctx, + FILE *output) +{ + ctx->out = output; +} + +/** + * Decodes an i830-i915 batch buffer, writing the output to stdout. + * + * \param data batch buffer contents + * \param count number of DWORDs to decode in the batch buffer + * \param hw_offset hardware address for the buffer + */ +drm_public void +drm_intel_decode(struct drm_intel_decode *ctx) +{ + int ret; + unsigned int index = 0; + uint32_t devid; + int size; + void *temp; + + if (!ctx) + return; + + /* Put a scratch page full of obviously undefined data after + * the batchbuffer. This lets us avoid a bunch of length + * checking in statically sized packets. + */ + size = ctx->base_count * 4; + temp = malloc(size + 4096); + memcpy(temp, ctx->base_data, size); + memset((char *)temp + size, 0xd0, 4096); + ctx->data = temp; + + ctx->hw_offset = ctx->base_hw_offset; + ctx->count = ctx->base_count; + + devid = ctx->devid; + head_offset = ctx->head; + tail_offset = ctx->tail; + out = ctx->out; + + saved_s2_set = 0; + saved_s4_set = 1; + + while (ctx->count > 0) { + index = 0; + + switch ((ctx->data[index] & 0xe0000000) >> 29) { + case 0x0: + ret = decode_mi(ctx); + + /* If MI_BATCHBUFFER_END happened, then dump + * the rest of the output in case we some day + * want it in debugging, but don't decode it + * since it'll just confuse in the common + * case. + */ + if (ret == -1) { + if (ctx->dump_past_end) { + index++; + } else { + for (index = index + 1; index < ctx->count; + index++) { + instr_out(ctx, index, "\n"); + } + } + } else + index += ret; + break; + case 0x2: + index += decode_2d(ctx); + break; + case 0x3: + if (IS_9XX(devid) && !IS_GEN3(devid)) { + index += + decode_3d_965(ctx); + } else if (IS_GEN3(devid)) { + index += decode_3d(ctx); + } else { + index += + decode_3d_i830(ctx); + } + break; + default: + instr_out(ctx, index, "UNKNOWN\n"); + index++; + break; + } + fflush(out); + + if (ctx->count < index) + break; + + ctx->count -= index; + ctx->data += index; + ctx->hw_offset += 4 * index; + } + + free(temp); +} diff --git a/intel/libdrm_intel.pc.in b/intel/libdrm_intel.pc.in new file mode 100644 index 0000000..670e4fe --- /dev/null +++ b/intel/libdrm_intel.pc.in @@ -0,0 +1,11 @@ +prefix=@prefix@ +exec_prefix=@exec_prefix@ +libdir=@libdir@ +includedir=@includedir@ + +Name: libdrm_intel +Description: Userspace interface to intel kernel DRM services +Version: @PACKAGE_VERSION@ +Requires: libdrm +Libs: -L${libdir} -ldrm_intel +Cflags: -I${includedir} -I${includedir}/libdrm diff --git a/intel/meson.build b/intel/meson.build new file mode 100644 index 0000000..4af2a35 --- /dev/null +++ b/intel/meson.build @@ -0,0 +1,109 @@ +# Copyright © 2017-2018 Intel Corporation + +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: + +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. + +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +libdrm_intel = library( + 'drm_intel', + [ + files( + 'intel_bufmgr.c', 'intel_bufmgr_fake.c', 'intel_bufmgr_gem.c', + 'intel_decode.c', 'mm.c', + ), + config_file, + ], + include_directories : [inc_root, inc_drm], + link_with : libdrm, + dependencies : [dep_pciaccess, dep_threads, dep_rt, dep_valgrind, dep_atomic_ops], + c_args : libdrm_c_args, + gnu_symbol_visibility : 'hidden', + version : '1.0.0', + install : true, +) + +ext_libdrm_intel = declare_dependency( + link_with : [libdrm, libdrm_intel], + include_directories : [inc_drm, include_directories('.')], +) + +if meson.version().version_compare('>= 0.54.0') + meson.override_dependency('libdrm_intel', ext_libdrm_intel) +endif + +install_headers( + 'intel_bufmgr.h', 'intel_aub.h', 'intel_debug.h', + subdir : 'libdrm', +) + +pkg.generate( + libdrm_intel, + name : 'libdrm_intel', + subdirs : ['.', 'libdrm'], + requires : 'libdrm', + description : 'Userspace interface to intel kernel DRM services', +) + +test_decode = executable( + 'test_decode', + files('test_decode.c'), + include_directories : [inc_root, inc_drm], + link_with : [libdrm, libdrm_intel], + c_args : libdrm_c_args, + gnu_symbol_visibility : 'hidden', +) + +test( + 'gen4-3d.batch', + find_program('tests/gen4-3d.batch.sh'), + workdir : meson.current_build_dir(), +) +test( + 'gen45-3d.batch', + find_program('tests/gm45-3d.batch.sh'), + workdir : meson.current_build_dir(), +) +test( + 'gen5-3d.batch', + find_program('tests/gen5-3d.batch.sh'), + workdir : meson.current_build_dir(), +) +test( + 'gen6-3d.batch', + find_program('tests/gen6-3d.batch.sh'), + workdir : meson.current_build_dir(), +) +test( + 'gen7-3d.batch', + find_program('tests/gen7-3d.batch.sh'), + workdir : meson.current_build_dir(), +) +test( + 'gen7-2d-copy.batch', + find_program('tests/gen7-2d-copy.batch.sh'), + workdir : meson.current_build_dir(), +) + +test( + 'intel-symbols-check', + symbols_check, + args : [ + '--lib', libdrm_intel, + '--symbols-file', files('intel-symbols.txt'), + '--nm', prog_nm.path(), + ], +) diff --git a/intel/mm.c b/intel/mm.c new file mode 100644 index 0000000..79d8719 --- /dev/null +++ b/intel/mm.c @@ -0,0 +1,260 @@ +/* + * GLX Hardware Device Driver common code + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <stdlib.h> +#include <assert.h> + +#include "xf86drm.h" +#include "libdrm_macros.h" +#include "mm.h" + +drm_private void mmDumpMemInfo(const struct mem_block *heap) +{ + drmMsg("Memory heap %p:\n", (void *)heap); + if (heap == 0) { + drmMsg(" heap == 0\n"); + } else { + const struct mem_block *p; + + for (p = heap->next; p != heap; p = p->next) { + drmMsg(" Offset:%08x, Size:%08x, %c%c\n", p->ofs, + p->size, p->free ? 'F' : '.', + p->reserved ? 'R' : '.'); + } + + drmMsg("\nFree list:\n"); + + for (p = heap->next_free; p != heap; p = p->next_free) { + drmMsg(" FREE Offset:%08x, Size:%08x, %c%c\n", p->ofs, + p->size, p->free ? 'F' : '.', + p->reserved ? 'R' : '.'); + } + + } + drmMsg("End of memory blocks\n"); +} + +drm_private struct mem_block *mmInit(int ofs, int size) +{ + struct mem_block *heap, *block; + + if (size <= 0) + return NULL; + + heap = (struct mem_block *)calloc(1, sizeof(struct mem_block)); + if (!heap) + return NULL; + + block = (struct mem_block *)calloc(1, sizeof(struct mem_block)); + if (!block) { + free(heap); + return NULL; + } + + heap->next = block; + heap->prev = block; + heap->next_free = block; + heap->prev_free = block; + + block->heap = heap; + block->next = heap; + block->prev = heap; + block->next_free = heap; + block->prev_free = heap; + + block->ofs = ofs; + block->size = size; + block->free = 1; + + return heap; +} + +static struct mem_block *SliceBlock(struct mem_block *p, + int startofs, int size, + int reserved, int alignment) +{ + struct mem_block *newblock; + + /* break left [p, newblock, p->next], then p = newblock */ + if (startofs > p->ofs) { + newblock = + (struct mem_block *)calloc(1, sizeof(struct mem_block)); + if (!newblock) + return NULL; + newblock->ofs = startofs; + newblock->size = p->size - (startofs - p->ofs); + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size -= newblock->size; + p = newblock; + } + + /* break right, also [p, newblock, p->next] */ + if (size < p->size) { + newblock = + (struct mem_block *)calloc(1, sizeof(struct mem_block)); + if (!newblock) + return NULL; + newblock->ofs = startofs + size; + newblock->size = p->size - size; + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size = size; + } + + /* p = middle block */ + p->free = 0; + + /* Remove p from the free list: + */ + p->next_free->prev_free = p->prev_free; + p->prev_free->next_free = p->next_free; + + p->next_free = 0; + p->prev_free = 0; + + p->reserved = reserved; + return p; +} + +drm_private struct mem_block *mmAllocMem(struct mem_block *heap, int size, + int align2, int startSearch) +{ + struct mem_block *p; + const int mask = (1 << align2) - 1; + int startofs = 0; + int endofs; + + if (!heap || align2 < 0 || size <= 0) + return NULL; + + for (p = heap->next_free; p != heap; p = p->next_free) { + assert(p->free); + + startofs = (p->ofs + mask) & ~mask; + if (startofs < startSearch) { + startofs = startSearch; + } + endofs = startofs + size; + if (endofs <= (p->ofs + p->size)) + break; + } + + if (p == heap) + return NULL; + + assert(p->free); + p = SliceBlock(p, startofs, size, 0, mask + 1); + + return p; +} + +static int Join2Blocks(struct mem_block *p) +{ + /* XXX there should be some assertions here */ + + /* NOTE: heap->free == 0 */ + + if (p->free && p->next->free) { + struct mem_block *q = p->next; + + assert(p->ofs + p->size == q->ofs); + p->size += q->size; + + p->next = q->next; + q->next->prev = p; + + q->next_free->prev_free = q->prev_free; + q->prev_free->next_free = q->next_free; + + free(q); + return 1; + } + return 0; +} + +drm_private int mmFreeMem(struct mem_block *b) +{ + if (!b) + return 0; + + if (b->free) { + drmMsg("block already free\n"); + return -1; + } + if (b->reserved) { + drmMsg("block is reserved\n"); + return -1; + } + + b->free = 1; + b->next_free = b->heap->next_free; + b->prev_free = b->heap; + b->next_free->prev_free = b; + b->prev_free->next_free = b; + + Join2Blocks(b); + if (b->prev != b->heap) + Join2Blocks(b->prev); + + return 0; +} + +drm_private void mmDestroy(struct mem_block *heap) +{ + struct mem_block *p; + + if (!heap) + return; + + for (p = heap->next; p != heap;) { + struct mem_block *next = p->next; + free(p); + p = next; + } + + free(heap); +} diff --git a/intel/mm.h b/intel/mm.h new file mode 100644 index 0000000..be3d90d --- /dev/null +++ b/intel/mm.h @@ -0,0 +1,80 @@ +/* + * GLX Hardware Device Driver common code + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Memory manager code. Primarily used by device drivers to manage texture + * heaps, etc. + */ + +#ifndef MM_H +#define MM_H + +#include "libdrm_macros.h" + +struct mem_block { + struct mem_block *next, *prev; + struct mem_block *next_free, *prev_free; + struct mem_block *heap; + int ofs, size; + unsigned int free:1; + unsigned int reserved:1; +}; + +/** + * input: total size in bytes + * return: a heap pointer if OK, NULL if error + */ +drm_private extern struct mem_block *mmInit(int ofs, int size); + +/** + * Allocate 'size' bytes with 2^align2 bytes alignment, + * restrict the search to free memory after 'startSearch' + * depth and back buffers should be in different 4mb banks + * to get better page hits if possible + * input: size = size of block + * align2 = 2^align2 bytes alignment + * startSearch = linear offset from start of heap to begin search + * return: pointer to the allocated block, 0 if error + */ +drm_private extern struct mem_block *mmAllocMem(struct mem_block *heap, + int size, int align2, + int startSearch); + +/** + * Free block starts at offset + * input: pointer to a block + * return: 0 if OK, -1 if error + */ +drm_private extern int mmFreeMem(struct mem_block *b); + +/** + * destroy MM + */ +drm_private extern void mmDestroy(struct mem_block *mmInit); + +/** + * For debugging purpose. + */ +drm_private extern void mmDumpMemInfo(const struct mem_block *mmInit); + +#endif diff --git a/intel/test_decode.c b/intel/test_decode.c new file mode 100644 index 0000000..c47752c --- /dev/null +++ b/intel/test_decode.c @@ -0,0 +1,195 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <err.h> + +#include "libdrm_macros.h" +#include "intel_bufmgr.h" +#include "intel_chipset.h" + +#define HW_OFFSET 0x12300000 + +static void +usage(void) +{ + fprintf(stderr, "usage:\n"); + fprintf(stderr, " test_decode <batch>\n"); + fprintf(stderr, " test_decode <batch> -dump\n"); + exit(1); +} + +static void +read_file(const char *filename, void **ptr, size_t *size) +{ + int fd, ret; + struct stat st; + + fd = open(filename, O_RDONLY); + if (fd < 0) + errx(1, "couldn't open `%s'", filename); + + ret = fstat(fd, &st); + if (ret) + errx(1, "couldn't stat `%s'", filename); + + *size = st.st_size; + *ptr = drm_mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (*ptr == MAP_FAILED) + errx(1, "couldn't map `%s'", filename); + + close(fd); +} + +static void +dump_batch(struct drm_intel_decode *ctx, const char *batch_filename) +{ + void *batch_ptr; + size_t batch_size; + + read_file(batch_filename, &batch_ptr, &batch_size); + + drm_intel_decode_set_batch_pointer(ctx, batch_ptr, HW_OFFSET, + batch_size / 4); + drm_intel_decode_set_output_file(ctx, stdout); + + drm_intel_decode(ctx); +} + +static void +compare_batch(struct drm_intel_decode *ctx, const char *batch_filename) +{ + FILE *out = NULL; + char *ptr; + void *ref_ptr, *batch_ptr; +#if HAVE_OPEN_MEMSTREAM + size_t size; +#endif + size_t ref_size, batch_size; + const char *ref_suffix = "-ref.txt"; + char *ref_filename; + + ref_filename = malloc(strlen(batch_filename) + strlen(ref_suffix) + 1); + sprintf(ref_filename, "%s%s", batch_filename, ref_suffix); + + /* Read the batch and reference. */ + read_file(batch_filename, &batch_ptr, &batch_size); + read_file(ref_filename, &ref_ptr, &ref_size); + + /* Set up our decode output in memory, because I don't want to + * figure out how to output to a file in a safe and sane way + * inside of an automake project's test infrastructure. + */ +#if HAVE_OPEN_MEMSTREAM + out = open_memstream(&ptr, &size); +#else + fprintf(stderr, "platform lacks open_memstream, skipping.\n"); + exit(77); +#endif + + drm_intel_decode_set_batch_pointer(ctx, batch_ptr, HW_OFFSET, + batch_size / 4); + drm_intel_decode_set_output_file(ctx, out); + + drm_intel_decode(ctx); + + if (strcmp(ref_ptr, ptr) != 0) { + fprintf(stderr, "Decode mismatch with reference `%s'.\n", + ref_filename); + fprintf(stderr, "You can dump the new output using:\n"); + fprintf(stderr, " test_decode \"%s\" -dump\n", batch_filename); + exit(1); + } + + fclose(out); + free(ref_filename); + free(ptr); +} + +static uint16_t +infer_devid(const char *batch_filename) +{ + struct { + const char *name; + uint16_t devid; + } chipsets[] = { + { "830", 0x3577}, + { "855", 0x3582}, + { "945", 0x2772}, + { "gen4", 0x2a02 }, + { "gm45", 0x2a42 }, + { "gen5", PCI_CHIP_ILD_G }, + { "gen6", PCI_CHIP_SANDYBRIDGE_GT2 }, + { "gen7", PCI_CHIP_IVYBRIDGE_GT2 }, + { "gen8", 0x1616 }, + { NULL, 0 }, + }; + int i; + + for (i = 0; chipsets[i].name != NULL; i++) { + if (strstr(batch_filename, chipsets[i].name)) + return chipsets[i].devid; + } + + fprintf(stderr, "Couldn't guess chipset id from batch filename `%s'.\n", + batch_filename); + fprintf(stderr, "Must be contain one of:\n"); + for (i = 0; chipsets[i].name != NULL; i++) { + fprintf(stderr, " %s\n", chipsets[i].name); + } + exit(1); +} + +int +main(int argc, char **argv) +{ + uint16_t devid; + struct drm_intel_decode *ctx; + + if (argc < 2) + usage(); + + + devid = infer_devid(argv[1]); + + ctx = drm_intel_decode_context_alloc(devid); + + if (argc == 3) { + if (strcmp(argv[2], "-dump") == 0) + dump_batch(ctx, argv[1]); + else + usage(); + } else { + compare_batch(ctx, argv[1]); + } + + drm_intel_decode_context_free(ctx); + + return 0; +} diff --git a/intel/tests/.gitignore b/intel/tests/.gitignore new file mode 100644 index 0000000..e9d01ec --- /dev/null +++ b/intel/tests/.gitignore @@ -0,0 +1 @@ +*-new.txt diff --git a/intel/tests/gen4-3d.batch b/intel/tests/gen4-3d.batch Binary files differnew file mode 100644 index 0000000..e6911a4 --- /dev/null +++ b/intel/tests/gen4-3d.batch diff --git a/intel/tests/gen4-3d.batch-ref.txt b/intel/tests/gen4-3d.batch-ref.txt new file mode 100644 index 0000000..20aa1d4 --- /dev/null +++ b/intel/tests/gen4-3d.batch-ref.txt @@ -0,0 +1,488 @@ +0x12300000: 0x61040000: 3DSTATE_PIPELINE_SELECT +0x12300004: 0x79090000: 3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP +0x12300008: 0x00000000: dword 1 +0x1230000c: 0x61020000: STATE_SIP +0x12300010: 0x00000000: dword 1 +0x12300014: 0x780b0000: 3DSTATE_VF_STATISTICS +0x12300018: 0x61010004: STATE_BASE_ADDRESS +0x1230001c: 0x00000001: general state base address 0x00000000 +0x12300020: 0x00000001: surface state base address 0x00000000 +0x12300024: 0x00000001: indirect state base address 0x00000000 +0x12300028: 0x00000001: general state upper bound disabled +0x1230002c: 0x00000001: indirect state upper bound disabled +0x12300030: 0x78010004: 3DSTATE_BINDING_TABLE_POINTERS +0x12300034: 0x00007e20: VS binding table +0x12300038: 0x00000000: GS binding table +0x1230003c: 0x00000000: Clip binding table +0x12300040: 0x00000000: SF binding table +0x12300044: 0x00007e20: WM binding table +0x12300048: 0x79010003: 3DSTATE_CONSTANT_COLOR +0x1230004c: 0x00000000: dword 1 +0x12300050: 0x00000000: dword 2 +0x12300054: 0x00000000: dword 3 +0x12300058: 0x00000000: dword 4 +0x1230005c: 0x79050003: 3DSTATE_DEPTH_BUFFER +0x12300060: 0x2c0805ff: 2D, z24s8, pitch = 1536 bytes, tiled +0x12300064: 0x00000000: depth offset +0x12300068: 0x09584ac0: 300x300 +0x1230006c: 0x00000000: volume depth +0x12300070: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300074: 0x00007d60: VS state +0x12300078: 0x00000000: GS state +0x1230007c: 0x00007d21: Clip state +0x12300080: 0x00007d80: SF state +0x12300084: 0x00007de0: WM state +0x12300088: 0x00007fc0: CC state +0x1230008c: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300090: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300094: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x12300098: 0x60010000: CS_URB_STATE +0x1230009c: 0x00000024: entry_size: 2 [192 bytes], n_entries: 4 +0x123000a0: 0x79000002: 3DSTATE_DRAWING_RECTANGLE +0x123000a4: 0x00000000: top left: 0,0 +0x123000a8: 0x012b012b: bottom right: 299,299 +0x123000ac: 0x00000000: origin: 0,0 +0x123000b0: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123000b4: 0x0000000c: buffer 0: sequential, pitch 12b +0x123000b8: 0x00000000: buffer address +0x123000bc: 0x00000000: max index +0x123000c0: 0x00000000: mbz +0x123000c4: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x123000c8: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123000cc: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123000d0: 0x60020100: CONSTANT_BUFFER: valid +0x123000d4: 0x00000001: offset: 0x00000000, length: 128 bytes +0x123000d8: 0x7b001804: 3DPRIMITIVE: tri fan sequential +0x123000dc: 0x00000004: vertex count +0x123000e0: 0x00000000: start vertex +0x123000e4: 0x00000001: instance count +0x123000e8: 0x00000000: start instance +0x123000ec: 0x00000000: index bias +0x123000f0: 0x78010004: 3DSTATE_BINDING_TABLE_POINTERS +0x123000f4: 0x00007b40: VS binding table +0x123000f8: 0x00000000: GS binding table +0x123000fc: 0x00000000: Clip binding table +0x12300100: 0x00000000: SF binding table +0x12300104: 0x00007b40: WM binding table +0x12300108: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x1230010c: 0x00007aa0: VS state +0x12300110: 0x00007a41: GS state +0x12300114: 0x00007a61: Clip state +0x12300118: 0x00007ac0: SF state +0x1230011c: 0x00007b00: WM state +0x12300120: 0x00007cc0: CC state +0x12300124: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300128: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x1230012c: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x12300130: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300134: 0x0000000c: buffer 0: sequential, pitch 12b +0x12300138: 0x00000000: buffer address +0x1230013c: 0x00000000: max index +0x12300140: 0x00000000: mbz +0x12300144: 0x60020100: CONSTANT_BUFFER: valid +0x12300148: 0x00000082: offset: 0x00000080, length: 192 bytes +0x1230014c: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300150: 0x00000052: vertex count +0x12300154: 0x00000000: start vertex +0x12300158: 0x00000001: instance count +0x1230015c: 0x00000000: start instance +0x12300160: 0x00000000: index bias +0x12300164: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300168: 0x00007aa0: VS state +0x1230016c: 0x00007a21: GS state +0x12300170: 0x00007a61: Clip state +0x12300174: 0x00007ac0: SF state +0x12300178: 0x00007b00: WM state +0x1230017c: 0x00007cc0: CC state +0x12300180: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300184: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300188: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230018c: 0x60020100: CONSTANT_BUFFER: valid +0x12300190: 0x00000082: offset: 0x00000080, length: 192 bytes +0x12300194: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300198: 0x00000050: vertex count +0x1230019c: 0x00000052: start vertex +0x123001a0: 0x00000001: instance count +0x123001a4: 0x00000000: start instance +0x123001a8: 0x00000000: index bias +0x123001ac: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123001b0: 0x00007aa0: VS state +0x123001b4: 0x00007a01: GS state +0x123001b8: 0x00007a61: Clip state +0x123001bc: 0x00007ac0: SF state +0x123001c0: 0x00007b00: WM state +0x123001c4: 0x00007cc0: CC state +0x123001c8: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123001cc: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x123001d0: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x123001d4: 0x60020100: CONSTANT_BUFFER: valid +0x123001d8: 0x00000142: offset: 0x00000140, length: 192 bytes +0x123001dc: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123001e0: 0x00000052: vertex count +0x123001e4: 0x000000a2: start vertex +0x123001e8: 0x00000001: instance count +0x123001ec: 0x00000000: start instance +0x123001f0: 0x00000000: index bias +0x123001f4: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123001f8: 0x00007aa0: VS state +0x123001fc: 0x000079e1: GS state +0x12300200: 0x00007a61: Clip state +0x12300204: 0x00007ac0: SF state +0x12300208: 0x00007b00: WM state +0x1230020c: 0x00007cc0: CC state +0x12300210: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300214: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300218: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230021c: 0x60020100: CONSTANT_BUFFER: valid +0x12300220: 0x00000142: offset: 0x00000140, length: 192 bytes +0x12300224: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300228: 0x00000050: vertex count +0x1230022c: 0x000000f4: start vertex +0x12300230: 0x00000001: instance count +0x12300234: 0x00000000: start instance +0x12300238: 0x00000000: index bias +0x1230023c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300240: 0x00007aa0: VS state +0x12300244: 0x000079c1: GS state +0x12300248: 0x00007a61: Clip state +0x1230024c: 0x00007ac0: SF state +0x12300250: 0x00007b00: WM state +0x12300254: 0x00007cc0: CC state +0x12300258: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230025c: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300260: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x12300264: 0x60020100: CONSTANT_BUFFER: valid +0x12300268: 0x00000142: offset: 0x00000140, length: 192 bytes +0x1230026c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300270: 0x000079a0: VS state +0x12300274: 0x000079c1: GS state +0x12300278: 0x00007a61: Clip state +0x1230027c: 0x00007ac0: SF state +0x12300280: 0x00007b00: WM state +0x12300284: 0x00007cc0: CC state +0x12300288: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230028c: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300290: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x12300294: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300298: 0x00000018: buffer 0: sequential, pitch 24b +0x1230029c: 0x00000f48: buffer address +0x123002a0: 0x00000000: max index +0x123002a4: 0x00000000: mbz +0x123002a8: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x123002ac: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123002b0: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123002b4: 0x0440000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x123002b8: 0x11130004: (X, Y, Z, 1.0), dst offset 0x10 bytes +0x123002bc: 0x60020100: CONSTANT_BUFFER: valid +0x123002c0: 0x00000202: offset: 0x00000200, length: 192 bytes +0x123002c4: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123002c8: 0x000000a2: vertex count +0x123002cc: 0x00000000: start vertex +0x123002d0: 0x00000001: instance count +0x123002d4: 0x00000000: start instance +0x123002d8: 0x00000000: index bias +0x123002dc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123002e0: 0x000079a0: VS state +0x123002e4: 0x00000000: GS state +0x123002e8: 0x00007901: Clip state +0x123002ec: 0x00007940: SF state +0x123002f0: 0x00007960: WM state +0x123002f4: 0x00007cc0: CC state +0x123002f8: 0x00000000: MI_NOOP +0x123002fc: 0x00000000: MI_NOOP +0x12300300: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300304: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300308: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230030c: 0x60020100: CONSTANT_BUFFER: valid +0x12300310: 0x00000202: offset: 0x00000200, length: 192 bytes +0x12300314: 0x7b001404: 3DPRIMITIVE: tri strip sequential +0x12300318: 0x0000002a: vertex count +0x1230031c: 0x000000a2: start vertex +0x12300320: 0x00000001: instance count +0x12300324: 0x00000000: start instance +0x12300328: 0x00000000: index bias +0x1230032c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300330: 0x00007860: VS state +0x12300334: 0x00007801: GS state +0x12300338: 0x00007821: Clip state +0x1230033c: 0x00007880: SF state +0x12300340: 0x000078a0: WM state +0x12300344: 0x00007cc0: CC state +0x12300348: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230034c: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300350: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x12300354: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300358: 0x0000000c: buffer 0: sequential, pitch 12b +0x1230035c: 0x00002268: buffer address +0x12300360: 0x00000000: max index +0x12300364: 0x00000000: mbz +0x12300368: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x1230036c: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300370: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300374: 0x60020100: CONSTANT_BUFFER: valid +0x12300378: 0x000002c2: offset: 0x000002c0, length: 192 bytes +0x1230037c: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300380: 0x0000002a: vertex count +0x12300384: 0x00000000: start vertex +0x12300388: 0x00000001: instance count +0x1230038c: 0x00000000: start instance +0x12300390: 0x00000000: index bias +0x12300394: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300398: 0x00007860: VS state +0x1230039c: 0x000077e1: GS state +0x123003a0: 0x00007821: Clip state +0x123003a4: 0x00007880: SF state +0x123003a8: 0x000078a0: WM state +0x123003ac: 0x00007cc0: CC state +0x123003b0: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123003b4: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x123003b8: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x123003bc: 0x60020100: CONSTANT_BUFFER: valid +0x123003c0: 0x000002c2: offset: 0x000002c0, length: 192 bytes +0x123003c4: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x123003c8: 0x00000028: vertex count +0x123003cc: 0x0000002a: start vertex +0x123003d0: 0x00000001: instance count +0x123003d4: 0x00000000: start instance +0x123003d8: 0x00000000: index bias +0x123003dc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123003e0: 0x00007860: VS state +0x123003e4: 0x000077c1: GS state +0x123003e8: 0x00007821: Clip state +0x123003ec: 0x00007880: SF state +0x123003f0: 0x000078a0: WM state +0x123003f4: 0x00007cc0: CC state +0x123003f8: 0x00000000: MI_NOOP +0x123003fc: 0x00000000: MI_NOOP +0x12300400: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300404: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300408: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230040c: 0x60020100: CONSTANT_BUFFER: valid +0x12300410: 0x00000382: offset: 0x00000380, length: 192 bytes +0x12300414: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300418: 0x0000002a: vertex count +0x1230041c: 0x00000052: start vertex +0x12300420: 0x00000001: instance count +0x12300424: 0x00000000: start instance +0x12300428: 0x00000000: index bias +0x1230042c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300430: 0x00007860: VS state +0x12300434: 0x000077a1: GS state +0x12300438: 0x00007821: Clip state +0x1230043c: 0x00007880: SF state +0x12300440: 0x000078a0: WM state +0x12300444: 0x00007cc0: CC state +0x12300448: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230044c: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300450: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x12300454: 0x60020100: CONSTANT_BUFFER: valid +0x12300458: 0x00000382: offset: 0x00000380, length: 192 bytes +0x1230045c: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300460: 0x00000028: vertex count +0x12300464: 0x0000007c: start vertex +0x12300468: 0x00000001: instance count +0x1230046c: 0x00000000: start instance +0x12300470: 0x00000000: index bias +0x12300474: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300478: 0x00007860: VS state +0x1230047c: 0x00007781: GS state +0x12300480: 0x00007821: Clip state +0x12300484: 0x00007880: SF state +0x12300488: 0x000078a0: WM state +0x1230048c: 0x00007cc0: CC state +0x12300490: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300494: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300498: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230049c: 0x60020100: CONSTANT_BUFFER: valid +0x123004a0: 0x00000382: offset: 0x00000380, length: 192 bytes +0x123004a4: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123004a8: 0x00007760: VS state +0x123004ac: 0x00007781: GS state +0x123004b0: 0x00007821: Clip state +0x123004b4: 0x00007880: SF state +0x123004b8: 0x000078a0: WM state +0x123004bc: 0x00007cc0: CC state +0x123004c0: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123004c4: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x123004c8: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x123004cc: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123004d0: 0x00000018: buffer 0: sequential, pitch 24b +0x123004d4: 0x00002a30: buffer address +0x123004d8: 0x00000000: max index +0x123004dc: 0x00000000: mbz +0x123004e0: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x123004e4: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123004e8: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123004ec: 0x0440000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x123004f0: 0x11130004: (X, Y, Z, 1.0), dst offset 0x10 bytes +0x123004f4: 0x60020100: CONSTANT_BUFFER: valid +0x123004f8: 0x00000442: offset: 0x00000440, length: 192 bytes +0x123004fc: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300500: 0x00000052: vertex count +0x12300504: 0x00000000: start vertex +0x12300508: 0x00000001: instance count +0x1230050c: 0x00000000: start instance +0x12300510: 0x00000000: index bias +0x12300514: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300518: 0x00007760: VS state +0x1230051c: 0x00000000: GS state +0x12300520: 0x000076c1: Clip state +0x12300524: 0x00007700: SF state +0x12300528: 0x00007720: WM state +0x1230052c: 0x00007cc0: CC state +0x12300530: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300534: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300538: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230053c: 0x60020100: CONSTANT_BUFFER: valid +0x12300540: 0x00000442: offset: 0x00000440, length: 192 bytes +0x12300544: 0x7b001404: 3DPRIMITIVE: tri strip sequential +0x12300548: 0x00000016: vertex count +0x1230054c: 0x00000052: start vertex +0x12300550: 0x00000001: instance count +0x12300554: 0x00000000: start instance +0x12300558: 0x00000000: index bias +0x1230055c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300560: 0x00007620: VS state +0x12300564: 0x000075c1: GS state +0x12300568: 0x000075e1: Clip state +0x1230056c: 0x00007640: SF state +0x12300570: 0x00007660: WM state +0x12300574: 0x00007cc0: CC state +0x12300578: 0x00000000: MI_NOOP +0x1230057c: 0x00000000: MI_NOOP +0x12300580: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300584: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300588: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230058c: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300590: 0x0000000c: buffer 0: sequential, pitch 12b +0x12300594: 0x000033f0: buffer address +0x12300598: 0x00000000: max index +0x1230059c: 0x00000000: mbz +0x123005a0: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x123005a4: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123005a8: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123005ac: 0x60020100: CONSTANT_BUFFER: valid +0x123005b0: 0x00000502: offset: 0x00000500, length: 192 bytes +0x123005b4: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123005b8: 0x0000002a: vertex count +0x123005bc: 0x00000000: start vertex +0x123005c0: 0x00000001: instance count +0x123005c4: 0x00000000: start instance +0x123005c8: 0x00000000: index bias +0x123005cc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123005d0: 0x00007620: VS state +0x123005d4: 0x000075a1: GS state +0x123005d8: 0x000075e1: Clip state +0x123005dc: 0x00007640: SF state +0x123005e0: 0x00007660: WM state +0x123005e4: 0x00007cc0: CC state +0x123005e8: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123005ec: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x123005f0: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x123005f4: 0x60020100: CONSTANT_BUFFER: valid +0x123005f8: 0x00000502: offset: 0x00000500, length: 192 bytes +0x123005fc: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300600: 0x00000028: vertex count +0x12300604: 0x0000002a: start vertex +0x12300608: 0x00000001: instance count +0x1230060c: 0x00000000: start instance +0x12300610: 0x00000000: index bias +0x12300614: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300618: 0x00007620: VS state +0x1230061c: 0x00007581: GS state +0x12300620: 0x000075e1: Clip state +0x12300624: 0x00007640: SF state +0x12300628: 0x00007660: WM state +0x1230062c: 0x00007cc0: CC state +0x12300630: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300634: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300638: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230063c: 0x60020100: CONSTANT_BUFFER: valid +0x12300640: 0x000005c2: offset: 0x000005c0, length: 192 bytes +0x12300644: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300648: 0x0000002a: vertex count +0x1230064c: 0x00000052: start vertex +0x12300650: 0x00000001: instance count +0x12300654: 0x00000000: start instance +0x12300658: 0x00000000: index bias +0x1230065c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300660: 0x00007620: VS state +0x12300664: 0x00007561: GS state +0x12300668: 0x000075e1: Clip state +0x1230066c: 0x00007640: SF state +0x12300670: 0x00007660: WM state +0x12300674: 0x00007cc0: CC state +0x12300678: 0x00000000: MI_NOOP +0x1230067c: 0x00000000: MI_NOOP +0x12300680: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300684: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300688: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230068c: 0x60020100: CONSTANT_BUFFER: valid +0x12300690: 0x000005c2: offset: 0x000005c0, length: 192 bytes +0x12300694: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300698: 0x00000028: vertex count +0x1230069c: 0x0000007c: start vertex +0x123006a0: 0x00000001: instance count +0x123006a4: 0x00000000: start instance +0x123006a8: 0x00000000: index bias +0x123006ac: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123006b0: 0x00007620: VS state +0x123006b4: 0x00007541: GS state +0x123006b8: 0x000075e1: Clip state +0x123006bc: 0x00007640: SF state +0x123006c0: 0x00007660: WM state +0x123006c4: 0x00007cc0: CC state +0x123006c8: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123006cc: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x123006d0: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x123006d4: 0x60020100: CONSTANT_BUFFER: valid +0x123006d8: 0x000005c2: offset: 0x000005c0, length: 192 bytes +0x123006dc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123006e0: 0x00007520: VS state +0x123006e4: 0x00007541: GS state +0x123006e8: 0x000075e1: Clip state +0x123006ec: 0x00007640: SF state +0x123006f0: 0x00007660: WM state +0x123006f4: 0x00007cc0: CC state +0x123006f8: 0x00000000: MI_NOOP +0x123006fc: 0x00000000: MI_NOOP +0x12300700: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300704: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300708: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230070c: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300710: 0x00000018: buffer 0: sequential, pitch 24b +0x12300714: 0x00003bb8: buffer address +0x12300718: 0x00000000: max index +0x1230071c: 0x00000000: mbz +0x12300720: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x12300724: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300728: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x1230072c: 0x0440000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x12300730: 0x11130004: (X, Y, Z, 1.0), dst offset 0x10 bytes +0x12300734: 0x60020100: CONSTANT_BUFFER: valid +0x12300738: 0x00000682: offset: 0x00000680, length: 192 bytes +0x1230073c: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300740: 0x00000052: vertex count +0x12300744: 0x00000000: start vertex +0x12300748: 0x00000001: instance count +0x1230074c: 0x00000000: start instance +0x12300750: 0x00000000: index bias +0x12300754: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300758: 0x00007520: VS state +0x1230075c: 0x00000000: GS state +0x12300760: 0x00007481: Clip state +0x12300764: 0x000074c0: SF state +0x12300768: 0x000074e0: WM state +0x1230076c: 0x00007cc0: CC state +0x12300770: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300774: 0x0320a020: vs fence: 32, clip_fence: 50, gs_fence: 40 +0x12300778: 0x10000042: sf fence: 66, vfe_fence: 0, cs_fence: 256 +0x1230077c: 0x60020100: CONSTANT_BUFFER: valid +0x12300780: 0x00000682: offset: 0x00000680, length: 192 bytes +0x12300784: 0x7b001404: 3DPRIMITIVE: tri strip sequential +0x12300788: 0x00000016: vertex count +0x1230078c: 0x00000052: start vertex +0x12300790: 0x00000001: instance count +0x12300794: 0x00000000: start instance +0x12300798: 0x00000000: index bias +0x1230079c: 0x05000000: MI_BATCH_BUFFER_END diff --git a/intel/tests/gen4-3d.batch.sh b/intel/tests/gen4-3d.batch.sh new file mode 120000 index 0000000..796ca5f --- /dev/null +++ b/intel/tests/gen4-3d.batch.sh @@ -0,0 +1 @@ +test-batch.sh
\ No newline at end of file diff --git a/intel/tests/gen5-3d.batch b/intel/tests/gen5-3d.batch Binary files differnew file mode 100644 index 0000000..cf9d8d8 --- /dev/null +++ b/intel/tests/gen5-3d.batch diff --git a/intel/tests/gen5-3d.batch-ref.txt b/intel/tests/gen5-3d.batch-ref.txt new file mode 100644 index 0000000..51dd85f --- /dev/null +++ b/intel/tests/gen5-3d.batch-ref.txt @@ -0,0 +1,512 @@ +0x12300000: 0x69040000: 3DSTATE_PIPELINE_SELECT +0x12300004: 0x79090000: 3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP +0x12300008: 0x00000000: dword 1 +0x1230000c: 0x61020000: STATE_SIP +0x12300010: 0x00000000: dword 1 +0x12300014: 0x680b0000: 3DSTATE_VF_STATISTICS +0x12300018: 0x61010006: STATE_BASE_ADDRESS +0x1230001c: 0x00000001: general state base address 0x00000000 +0x12300020: 0x00000001: surface state base address 0x00000000 +0x12300024: 0x00000001: indirect state base address 0x00000000 +0x12300028: 0x00000001: instruction state base address 0x00000000 +0x1230002c: 0x00000001: general state upper bound disabled +0x12300030: 0x00000001: indirect state upper bound disabled +0x12300034: 0x00000001: instruction state upper bound disabled +0x12300038: 0x78010004: 3DSTATE_BINDING_TABLE_POINTERS +0x1230003c: 0x00007e20: VS binding table +0x12300040: 0x00000000: GS binding table +0x12300044: 0x00000000: Clip binding table +0x12300048: 0x00000000: SF binding table +0x1230004c: 0x00007e20: WM binding table +0x12300050: 0x79010003: 3DSTATE_CONSTANT_COLOR +0x12300054: 0x00000000: dword 1 +0x12300058: 0x00000000: dword 2 +0x1230005c: 0x00000000: dword 3 +0x12300060: 0x00000000: dword 4 +0x12300064: 0x79050004: 3DSTATE_DEPTH_BUFFER +0x12300068: 0x2c0805ff: 2D, z24s8, pitch = 1536 bytes, tiled, HiZ 0, Separate Stencil 0 +0x1230006c: 0x00000000: depth offset +0x12300070: 0x09584ac0: 300x300 +0x12300074: 0x00000000: volume depth +0x12300078: 0x00000000: +0x1230007c: 0x02000000: MI_FLUSH +0x12300080: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300084: 0x00007d60: VS state +0x12300088: 0x00000000: GS state +0x1230008c: 0x00007d21: Clip state +0x12300090: 0x00007d80: SF state +0x12300094: 0x00007de0: WM state +0x12300098: 0x00007fc0: CC state +0x1230009c: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123000a0: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x123000a4: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x123000a8: 0x60010000: CS_URB_STATE +0x123000ac: 0x00000024: entry_size: 2 [192 bytes], n_entries: 4 +0x123000b0: 0x79000002: 3DSTATE_DRAWING_RECTANGLE +0x123000b4: 0x00000000: top left: 0,0 +0x123000b8: 0x012b012b: bottom right: 299,299 +0x123000bc: 0x00000000: origin: 0,0 +0x123000c0: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123000c4: 0x0000000c: buffer 0: sequential, pitch 12b +0x123000c8: 0x00000000: buffer address +0x123000cc: 0x0000ffff: max index +0x123000d0: 0x00000000: mbz +0x123000d4: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x123000d8: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123000dc: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123000e0: 0x60020100: CONSTANT_BUFFER: valid +0x123000e4: 0x00000001: offset: 0x00000000, length: 128 bytes +0x123000e8: 0x7b001804: 3DPRIMITIVE: tri fan sequential +0x123000ec: 0x00000004: vertex count +0x123000f0: 0x00000000: start vertex +0x123000f4: 0x00000001: instance count +0x123000f8: 0x00000000: start instance +0x123000fc: 0x00000000: index bias +0x12300100: 0x78010004: 3DSTATE_BINDING_TABLE_POINTERS +0x12300104: 0x00007b40: VS binding table +0x12300108: 0x00000000: GS binding table +0x1230010c: 0x00000000: Clip binding table +0x12300110: 0x00000000: SF binding table +0x12300114: 0x00007b40: WM binding table +0x12300118: 0x02000000: MI_FLUSH +0x1230011c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300120: 0x00007aa0: VS state +0x12300124: 0x00007a41: GS state +0x12300128: 0x00007a61: Clip state +0x1230012c: 0x00007ac0: SF state +0x12300130: 0x00007b00: WM state +0x12300134: 0x00007cc0: CC state +0x12300138: 0x00000000: MI_NOOP +0x1230013c: 0x00000000: MI_NOOP +0x12300140: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300144: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300148: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x1230014c: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300150: 0x0000000c: buffer 0: sequential, pitch 12b +0x12300154: 0x00000000: buffer address +0x12300158: 0x00007fff: max index +0x1230015c: 0x00000000: mbz +0x12300160: 0x60020100: CONSTANT_BUFFER: valid +0x12300164: 0x00000082: offset: 0x00000080, length: 192 bytes +0x12300168: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x1230016c: 0x00000052: vertex count +0x12300170: 0x00000000: start vertex +0x12300174: 0x00000001: instance count +0x12300178: 0x00000000: start instance +0x1230017c: 0x00000000: index bias +0x12300180: 0x02000000: MI_FLUSH +0x12300184: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300188: 0x00007aa0: VS state +0x1230018c: 0x00007a21: GS state +0x12300190: 0x00007a61: Clip state +0x12300194: 0x00007ac0: SF state +0x12300198: 0x00007b00: WM state +0x1230019c: 0x00007cc0: CC state +0x123001a0: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123001a4: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x123001a8: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x123001ac: 0x60020100: CONSTANT_BUFFER: valid +0x123001b0: 0x00000082: offset: 0x00000080, length: 192 bytes +0x123001b4: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x123001b8: 0x00000050: vertex count +0x123001bc: 0x00000052: start vertex +0x123001c0: 0x00000001: instance count +0x123001c4: 0x00000000: start instance +0x123001c8: 0x00000000: index bias +0x123001cc: 0x02000000: MI_FLUSH +0x123001d0: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123001d4: 0x00007aa0: VS state +0x123001d8: 0x00007a01: GS state +0x123001dc: 0x00007a61: Clip state +0x123001e0: 0x00007ac0: SF state +0x123001e4: 0x00007b00: WM state +0x123001e8: 0x00007cc0: CC state +0x123001ec: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123001f0: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x123001f4: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x123001f8: 0x60020100: CONSTANT_BUFFER: valid +0x123001fc: 0x00000142: offset: 0x00000140, length: 192 bytes +0x12300200: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300204: 0x00000052: vertex count +0x12300208: 0x000000a2: start vertex +0x1230020c: 0x00000001: instance count +0x12300210: 0x00000000: start instance +0x12300214: 0x00000000: index bias +0x12300218: 0x02000000: MI_FLUSH +0x1230021c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300220: 0x00007aa0: VS state +0x12300224: 0x000079e1: GS state +0x12300228: 0x00007a61: Clip state +0x1230022c: 0x00007ac0: SF state +0x12300230: 0x00007b00: WM state +0x12300234: 0x00007cc0: CC state +0x12300238: 0x00000000: MI_NOOP +0x1230023c: 0x00000000: MI_NOOP +0x12300240: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300244: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300248: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x1230024c: 0x60020100: CONSTANT_BUFFER: valid +0x12300250: 0x00000142: offset: 0x00000140, length: 192 bytes +0x12300254: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300258: 0x00000050: vertex count +0x1230025c: 0x000000f4: start vertex +0x12300260: 0x00000001: instance count +0x12300264: 0x00000000: start instance +0x12300268: 0x00000000: index bias +0x1230026c: 0x02000000: MI_FLUSH +0x12300270: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300274: 0x00007aa0: VS state +0x12300278: 0x000079c1: GS state +0x1230027c: 0x00007a61: Clip state +0x12300280: 0x00007ac0: SF state +0x12300284: 0x00007b00: WM state +0x12300288: 0x00007cc0: CC state +0x1230028c: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300290: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300294: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x12300298: 0x60020100: CONSTANT_BUFFER: valid +0x1230029c: 0x00000142: offset: 0x00000140, length: 192 bytes +0x123002a0: 0x02000000: MI_FLUSH +0x123002a4: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123002a8: 0x000079a0: VS state +0x123002ac: 0x000079c1: GS state +0x123002b0: 0x00007a61: Clip state +0x123002b4: 0x00007ac0: SF state +0x123002b8: 0x00007b00: WM state +0x123002bc: 0x00007cc0: CC state +0x123002c0: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123002c4: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x123002c8: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x123002cc: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123002d0: 0x00000018: buffer 0: sequential, pitch 24b +0x123002d4: 0x00000f48: buffer address +0x123002d8: 0x00007fff: max index +0x123002dc: 0x00000000: mbz +0x123002e0: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x123002e4: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123002e8: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123002ec: 0x0440000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x123002f0: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123002f4: 0x60020100: CONSTANT_BUFFER: valid +0x123002f8: 0x00000202: offset: 0x00000200, length: 192 bytes +0x123002fc: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300300: 0x000000a2: vertex count +0x12300304: 0x00000000: start vertex +0x12300308: 0x00000001: instance count +0x1230030c: 0x00000000: start instance +0x12300310: 0x00000000: index bias +0x12300314: 0x02000000: MI_FLUSH +0x12300318: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x1230031c: 0x000079a0: VS state +0x12300320: 0x00000000: GS state +0x12300324: 0x00007901: Clip state +0x12300328: 0x00007940: SF state +0x1230032c: 0x00007960: WM state +0x12300330: 0x00007cc0: CC state +0x12300334: 0x00000000: MI_NOOP +0x12300338: 0x00000000: MI_NOOP +0x1230033c: 0x00000000: MI_NOOP +0x12300340: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300344: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300348: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x1230034c: 0x60020100: CONSTANT_BUFFER: valid +0x12300350: 0x00000202: offset: 0x00000200, length: 192 bytes +0x12300354: 0x7b001404: 3DPRIMITIVE: tri strip sequential +0x12300358: 0x0000002a: vertex count +0x1230035c: 0x000000a2: start vertex +0x12300360: 0x00000001: instance count +0x12300364: 0x00000000: start instance +0x12300368: 0x00000000: index bias +0x1230036c: 0x02000000: MI_FLUSH +0x12300370: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300374: 0x00007860: VS state +0x12300378: 0x00007801: GS state +0x1230037c: 0x00007821: Clip state +0x12300380: 0x00007880: SF state +0x12300384: 0x000078a0: WM state +0x12300388: 0x00007cc0: CC state +0x1230038c: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300390: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300394: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x12300398: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x1230039c: 0x0000000c: buffer 0: sequential, pitch 12b +0x123003a0: 0x00002268: buffer address +0x123003a4: 0x00007fff: max index +0x123003a8: 0x00000000: mbz +0x123003ac: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x123003b0: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123003b4: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123003b8: 0x60020100: CONSTANT_BUFFER: valid +0x123003bc: 0x000002c2: offset: 0x000002c0, length: 192 bytes +0x123003c0: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123003c4: 0x0000002a: vertex count +0x123003c8: 0x00000000: start vertex +0x123003cc: 0x00000001: instance count +0x123003d0: 0x00000000: start instance +0x123003d4: 0x00000000: index bias +0x123003d8: 0x02000000: MI_FLUSH +0x123003dc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123003e0: 0x00007860: VS state +0x123003e4: 0x000077e1: GS state +0x123003e8: 0x00007821: Clip state +0x123003ec: 0x00007880: SF state +0x123003f0: 0x000078a0: WM state +0x123003f4: 0x00007cc0: CC state +0x123003f8: 0x00000000: MI_NOOP +0x123003fc: 0x00000000: MI_NOOP +0x12300400: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300404: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300408: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x1230040c: 0x60020100: CONSTANT_BUFFER: valid +0x12300410: 0x000002c2: offset: 0x000002c0, length: 192 bytes +0x12300414: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300418: 0x00000028: vertex count +0x1230041c: 0x0000002a: start vertex +0x12300420: 0x00000001: instance count +0x12300424: 0x00000000: start instance +0x12300428: 0x00000000: index bias +0x1230042c: 0x02000000: MI_FLUSH +0x12300430: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300434: 0x00007860: VS state +0x12300438: 0x000077c1: GS state +0x1230043c: 0x00007821: Clip state +0x12300440: 0x00007880: SF state +0x12300444: 0x000078a0: WM state +0x12300448: 0x00007cc0: CC state +0x1230044c: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300450: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300454: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x12300458: 0x60020100: CONSTANT_BUFFER: valid +0x1230045c: 0x00000382: offset: 0x00000380, length: 192 bytes +0x12300460: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300464: 0x0000002a: vertex count +0x12300468: 0x00000052: start vertex +0x1230046c: 0x00000001: instance count +0x12300470: 0x00000000: start instance +0x12300474: 0x00000000: index bias +0x12300478: 0x02000000: MI_FLUSH +0x1230047c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300480: 0x00007860: VS state +0x12300484: 0x000077a1: GS state +0x12300488: 0x00007821: Clip state +0x1230048c: 0x00007880: SF state +0x12300490: 0x000078a0: WM state +0x12300494: 0x00007cc0: CC state +0x12300498: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230049c: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x123004a0: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x123004a4: 0x60020100: CONSTANT_BUFFER: valid +0x123004a8: 0x00000382: offset: 0x00000380, length: 192 bytes +0x123004ac: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x123004b0: 0x00000028: vertex count +0x123004b4: 0x0000007c: start vertex +0x123004b8: 0x00000001: instance count +0x123004bc: 0x00000000: start instance +0x123004c0: 0x00000000: index bias +0x123004c4: 0x02000000: MI_FLUSH +0x123004c8: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123004cc: 0x00007860: VS state +0x123004d0: 0x00007781: GS state +0x123004d4: 0x00007821: Clip state +0x123004d8: 0x00007880: SF state +0x123004dc: 0x000078a0: WM state +0x123004e0: 0x00007cc0: CC state +0x123004e4: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123004e8: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x123004ec: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x123004f0: 0x60020100: CONSTANT_BUFFER: valid +0x123004f4: 0x00000382: offset: 0x00000380, length: 192 bytes +0x123004f8: 0x02000000: MI_FLUSH +0x123004fc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300500: 0x00007760: VS state +0x12300504: 0x00007781: GS state +0x12300508: 0x00007821: Clip state +0x1230050c: 0x00007880: SF state +0x12300510: 0x000078a0: WM state +0x12300514: 0x00007cc0: CC state +0x12300518: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230051c: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300520: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x12300524: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300528: 0x00000018: buffer 0: sequential, pitch 24b +0x1230052c: 0x00002a30: buffer address +0x12300530: 0x00007fff: max index +0x12300534: 0x00000000: mbz +0x12300538: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x1230053c: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300540: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300544: 0x0440000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x12300548: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x1230054c: 0x60020100: CONSTANT_BUFFER: valid +0x12300550: 0x00000442: offset: 0x00000440, length: 192 bytes +0x12300554: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300558: 0x00000052: vertex count +0x1230055c: 0x00000000: start vertex +0x12300560: 0x00000001: instance count +0x12300564: 0x00000000: start instance +0x12300568: 0x00000000: index bias +0x1230056c: 0x02000000: MI_FLUSH +0x12300570: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300574: 0x00007760: VS state +0x12300578: 0x00000000: GS state +0x1230057c: 0x000076c1: Clip state +0x12300580: 0x00007700: SF state +0x12300584: 0x00007720: WM state +0x12300588: 0x00007cc0: CC state +0x1230058c: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300590: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300594: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x12300598: 0x60020100: CONSTANT_BUFFER: valid +0x1230059c: 0x00000442: offset: 0x00000440, length: 192 bytes +0x123005a0: 0x7b001404: 3DPRIMITIVE: tri strip sequential +0x123005a4: 0x00000016: vertex count +0x123005a8: 0x00000052: start vertex +0x123005ac: 0x00000001: instance count +0x123005b0: 0x00000000: start instance +0x123005b4: 0x00000000: index bias +0x123005b8: 0x02000000: MI_FLUSH +0x123005bc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123005c0: 0x00007620: VS state +0x123005c4: 0x000075c1: GS state +0x123005c8: 0x000075e1: Clip state +0x123005cc: 0x00007640: SF state +0x123005d0: 0x00007660: WM state +0x123005d4: 0x00007cc0: CC state +0x123005d8: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123005dc: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x123005e0: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x123005e4: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123005e8: 0x0000000c: buffer 0: sequential, pitch 12b +0x123005ec: 0x000033f0: buffer address +0x123005f0: 0x00007fff: max index +0x123005f4: 0x00000000: mbz +0x123005f8: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x123005fc: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300600: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300604: 0x60020100: CONSTANT_BUFFER: valid +0x12300608: 0x00000502: offset: 0x00000500, length: 192 bytes +0x1230060c: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300610: 0x0000002a: vertex count +0x12300614: 0x00000000: start vertex +0x12300618: 0x00000001: instance count +0x1230061c: 0x00000000: start instance +0x12300620: 0x00000000: index bias +0x12300624: 0x02000000: MI_FLUSH +0x12300628: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x1230062c: 0x00007620: VS state +0x12300630: 0x000075a1: GS state +0x12300634: 0x000075e1: Clip state +0x12300638: 0x00007640: SF state +0x1230063c: 0x00007660: WM state +0x12300640: 0x00007cc0: CC state +0x12300644: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300648: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x1230064c: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x12300650: 0x60020100: CONSTANT_BUFFER: valid +0x12300654: 0x00000502: offset: 0x00000500, length: 192 bytes +0x12300658: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x1230065c: 0x00000028: vertex count +0x12300660: 0x0000002a: start vertex +0x12300664: 0x00000001: instance count +0x12300668: 0x00000000: start instance +0x1230066c: 0x00000000: index bias +0x12300670: 0x02000000: MI_FLUSH +0x12300674: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300678: 0x00007620: VS state +0x1230067c: 0x00007581: GS state +0x12300680: 0x000075e1: Clip state +0x12300684: 0x00007640: SF state +0x12300688: 0x00007660: WM state +0x1230068c: 0x00007cc0: CC state +0x12300690: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300694: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300698: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x1230069c: 0x60020100: CONSTANT_BUFFER: valid +0x123006a0: 0x000005c2: offset: 0x000005c0, length: 192 bytes +0x123006a4: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123006a8: 0x0000002a: vertex count +0x123006ac: 0x00000052: start vertex +0x123006b0: 0x00000001: instance count +0x123006b4: 0x00000000: start instance +0x123006b8: 0x00000000: index bias +0x123006bc: 0x02000000: MI_FLUSH +0x123006c0: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123006c4: 0x00007620: VS state +0x123006c8: 0x00007561: GS state +0x123006cc: 0x000075e1: Clip state +0x123006d0: 0x00007640: SF state +0x123006d4: 0x00007660: WM state +0x123006d8: 0x00007cc0: CC state +0x123006dc: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123006e0: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x123006e4: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x123006e8: 0x60020100: CONSTANT_BUFFER: valid +0x123006ec: 0x000005c2: offset: 0x000005c0, length: 192 bytes +0x123006f0: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x123006f4: 0x00000028: vertex count +0x123006f8: 0x0000007c: start vertex +0x123006fc: 0x00000001: instance count +0x12300700: 0x00000000: start instance +0x12300704: 0x00000000: index bias +0x12300708: 0x02000000: MI_FLUSH +0x1230070c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300710: 0x00007620: VS state +0x12300714: 0x00007541: GS state +0x12300718: 0x000075e1: Clip state +0x1230071c: 0x00007640: SF state +0x12300720: 0x00007660: WM state +0x12300724: 0x00007cc0: CC state +0x12300728: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230072c: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300730: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x12300734: 0x60020100: CONSTANT_BUFFER: valid +0x12300738: 0x000005c2: offset: 0x000005c0, length: 192 bytes +0x1230073c: 0x02000000: MI_FLUSH +0x12300740: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300744: 0x00007520: VS state +0x12300748: 0x00007541: GS state +0x1230074c: 0x000075e1: Clip state +0x12300750: 0x00007640: SF state +0x12300754: 0x00007660: WM state +0x12300758: 0x00007cc0: CC state +0x1230075c: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300760: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x12300764: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x12300768: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x1230076c: 0x00000018: buffer 0: sequential, pitch 24b +0x12300770: 0x00003bb8: buffer address +0x12300774: 0x00007fff: max index +0x12300778: 0x00000000: mbz +0x1230077c: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x12300780: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300784: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300788: 0x0440000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x1230078c: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300790: 0x60020100: CONSTANT_BUFFER: valid +0x12300794: 0x00000682: offset: 0x00000680, length: 192 bytes +0x12300798: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x1230079c: 0x00000052: vertex count +0x123007a0: 0x00000000: start vertex +0x123007a4: 0x00000001: instance count +0x123007a8: 0x00000000: start instance +0x123007ac: 0x00000000: index bias +0x123007b0: 0x02000000: MI_FLUSH +0x123007b4: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123007b8: 0x00007520: VS state +0x123007bc: 0x00000000: GS state +0x123007c0: 0x00007481: Clip state +0x123007c4: 0x000074c0: SF state +0x123007c8: 0x000074e0: WM state +0x123007cc: 0x00007cc0: CC state +0x123007d0: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123007d4: 0x12444100: vs fence: 256, clip_fence: 292, gs_fence: 272 +0x123007d8: 0x40000184: sf fence: 388, vfe_fence: 0, cs_fence: 1024 +0x123007dc: 0x60020100: CONSTANT_BUFFER: valid +0x123007e0: 0x00000682: offset: 0x00000680, length: 192 bytes +0x123007e4: 0x7b001404: 3DPRIMITIVE: tri strip sequential +0x123007e8: 0x00000016: vertex count +0x123007ec: 0x00000052: start vertex +0x123007f0: 0x00000001: instance count +0x123007f4: 0x00000000: start instance +0x123007f8: 0x00000000: index bias +0x123007fc: 0x05000000: MI_BATCH_BUFFER_END diff --git a/intel/tests/gen5-3d.batch.sh b/intel/tests/gen5-3d.batch.sh new file mode 120000 index 0000000..796ca5f --- /dev/null +++ b/intel/tests/gen5-3d.batch.sh @@ -0,0 +1 @@ +test-batch.sh
\ No newline at end of file diff --git a/intel/tests/gen6-3d.batch b/intel/tests/gen6-3d.batch Binary files differnew file mode 100644 index 0000000..d57147e --- /dev/null +++ b/intel/tests/gen6-3d.batch diff --git a/intel/tests/gen6-3d.batch-ref.txt b/intel/tests/gen6-3d.batch-ref.txt new file mode 100644 index 0000000..04cbddc --- /dev/null +++ b/intel/tests/gen6-3d.batch-ref.txt @@ -0,0 +1,990 @@ +0x12300000: 0x7a000002: PIPE_CONTROL +0x12300004: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300008: 0x00000000: +0x1230000c: 0x00000000: +0x12300010: 0x7a000002: PIPE_CONTROL +0x12300014: 0x00004000: qword write, +0x12300018: 0x00000000: +0x1230001c: 0x00000000: +0x12300020: 0x69040000: 3DSTATE_PIPELINE_SELECT +0x12300024: 0x790d0001: 3DSTATE_MULTISAMPLE +0x12300028: 0x00000000: dword 1 +0x1230002c: 0x00000000: dword 2 +0x12300030: 0x78180000: 3DSTATE_SAMPLE_MASK +0x12300034: 0x00000001: dword 1 +0x12300038: 0x790b0002: 3DSTATE_GS_SVB_INDEX +0x1230003c: 0x00000000: dword 1 +0x12300040: 0x00000000: dword 2 +0x12300044: 0xffffffff: dword 3 +0x12300048: 0x790b0002: 3DSTATE_GS_SVB_INDEX +0x1230004c: 0x20000000: dword 1 +0x12300050: 0x00000000: dword 2 +0x12300054: 0xffffffff: dword 3 +0x12300058: 0x790b0002: 3DSTATE_GS_SVB_INDEX +0x1230005c: 0x40000000: dword 1 +0x12300060: 0x00000000: dword 2 +0x12300064: 0xffffffff: dword 3 +0x12300068: 0x790b0002: 3DSTATE_GS_SVB_INDEX +0x1230006c: 0x60000000: dword 1 +0x12300070: 0x00000000: dword 2 +0x12300074: 0xffffffff: dword 3 +0x12300078: 0x61020000: STATE_SIP +0x1230007c: 0x00000000: dword 1 +0x12300080: 0x680b0000: 3DSTATE_VF_STATISTICS +0x12300084: 0x61010008: STATE_BASE_ADDRESS +0x12300088: 0x00000001: general state base address 0x00000000 +0x1230008c: 0x00000001: surface state base address 0x00000000 +0x12300090: 0x00000001: dynamic state base address 0x00000000 +0x12300094: 0x00000001: indirect state base address 0x00000000 +0x12300098: 0x00000001: instruction state base address 0x00000000 +0x1230009c: 0x00000001: general state upper bound disabled +0x123000a0: 0x00000001: dynamic state upper bound disabled +0x123000a4: 0x00000001: indirect state upper bound disabled +0x123000a8: 0x00000001: instruction state upper bound disabled +0x123000ac: 0x780d1c02: 3DSTATE_VIEWPORT_STATE_POINTERS +0x123000b0: 0x00007fe0: clip +0x123000b4: 0x00007fc0: sf +0x123000b8: 0x00007fa0: cc +0x123000bc: 0x78050001: 3DSTATE_URB +0x123000c0: 0x00000100: VS entries 256, alloc size 1 (1024bit row) +0x123000c4: 0x00000000: GS entries 0, alloc size 1 (1024bit row) +0x123000c8: 0x780e0002: 3DSTATE_CC_STATE_POINTERS +0x123000cc: 0x00007f81: blend change 1 +0x123000d0: 0x00007f01: depth stencil change 1 +0x123000d4: 0x00007f41: cc change 1 +0x123000d8: 0x78021302: 3DSTATE_SAMPLER_STATE_POINTERS: VS mod 1, GS mod 1, PS mod 1 +0x123000dc: 0x00000000: VS sampler state +0x123000e0: 0x00000000: GS sampler state +0x123000e4: 0x00000000: WM sampler state +0x123000e8: 0x78150003: 3DSTATE_CONSTANT_VS_STATE +0x123000ec: 0x00000000: dword 1 +0x123000f0: 0x00000000: dword 2 +0x123000f4: 0x00000000: dword 3 +0x123000f8: 0x00000000: dword 4 +0x123000fc: 0x78100004: 3DSTATE_VS +0x12300100: 0x00000000: kernel pointer +0x12300104: 0x00000000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300108: 0x00000000: scratch offset +0x1230010c: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300110: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300114: 0x7a000002: PIPE_CONTROL +0x12300118: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x1230011c: 0x00000000: +0x12300120: 0x00000000: +0x12300124: 0x78160003: 3DSTATE_CONSTANT_GS_STATE +0x12300128: 0x00000000: dword 1 +0x1230012c: 0x00000000: dword 2 +0x12300130: 0x00000000: dword 3 +0x12300134: 0x00000000: dword 4 +0x12300138: 0x78110005: 3DSTATE_GS +0x1230013c: 0x00000000: kernel pointer +0x12300140: 0x00000000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300144: 0x00000000: scratch offset +0x12300148: 0x00000001: Dispatch GRF start 1, VUE read length 0, VUE read offset 0 +0x1230014c: 0x00000500: Max Threads 1, Rendering enable +0x12300150: 0x00000000: Reorder disable, Discard Adjaceny disable, GS disable +0x12300154: 0x78120002: 3DSTATE_CLIP +0x12300158: 0x00000400: UserClip distance cull test mask 0x0 +0x1230015c: 0x98000026: Clip enable, API mode OGL, Viewport XY test enable, Viewport Z test enable, Guardband test disable, Clip mode 0, Perspective Divide enable, Non-Perspective Barycentric disable, Tri Provoking 2, Line Provoking 1, Trifan Provoking 2 +0x12300160: 0x0003ffe0: Min PointWidth 1, Max PointWidth 2047, Force Zero RTAIndex enable, Max VPIndex 0 +0x12300164: 0x78130012: 3DSTATE_SF +0x12300168: 0x00200810: Attrib Out 0, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x1230016c: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x12300170: 0x22000000: AA disable, CullMode 1, Scissor disable, Multisample m ode 0 +0x12300174: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x12300178: 0x00000000: Global Depth Offset Constant 0.000000 +0x1230017c: 0x00000000: Global Depth Offset Scale 0.000000 +0x12300180: 0x00000000: Global Depth Offset Clamp 0.000000 +0x12300184: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300188: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x1230018c: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300190: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300194: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300198: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x1230019c: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123001a0: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123001a4: 0x00000000: Point Sprite TexCoord Enable +0x123001a8: 0x00000000: Const Interp Enable +0x123001ac: 0x00000000: Attrib 7-0 WrapShortest Enable +0x123001b0: 0x00000000: Attrib 15-8 WrapShortest Enable +0x123001b4: 0x78171003: 3DSTATE_CONSTANT_PS_STATE +0x123001b8: 0x00007ee0: dword 1 +0x123001bc: 0x00000000: dword 2 +0x123001c0: 0x00000000: dword 3 +0x123001c4: 0x00000000: dword 4 +0x123001c8: 0x78140007: 3DSTATE_WM +0x123001cc: 0x000000c0: kernel start pointer 0 +0x123001d0: 0x00000000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x123001d4: 0x00000000: scratch offset +0x123001d8: 0x80020002: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 2, start[1] 0, start[2] 2 +0x123001dc: 0x4e084003: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 1 +0x123001e0: 0x00000000: Num SF output 0, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x0, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x123001e4: 0x00000000: kernel start pointer 1 +0x123001e8: 0x00000140: kernel start pointer 2 +0x123001ec: 0x780f0000: 3DSTATE_SCISSOR_POINTERS +0x123001f0: 0x00007d20: scissor rect offset +0x123001f4: 0x78011302: 3DSTATE_BINDING_TABLE_POINTERS: VS mod 1, GS mod 1, PS mod 1 +0x123001f8: 0x00007d40: VS binding table +0x123001fc: 0x00007d40: GS binding table +0x12300200: 0x00007d40: WM binding table +0x12300204: 0x7a000002: PIPE_CONTROL +0x12300208: 0x00002000: no write, depth stall, +0x1230020c: 0x00000000: +0x12300210: 0x00000000: +0x12300214: 0x7a000002: PIPE_CONTROL +0x12300218: 0x00000001: no write, depth cache flush, +0x1230021c: 0x00000000: +0x12300220: 0x00000000: +0x12300224: 0x7a000002: PIPE_CONTROL +0x12300228: 0x00002000: no write, depth stall, +0x1230022c: 0x00000000: +0x12300230: 0x00000000: +0x12300234: 0x79050005: 3DSTATE_DEPTH_BUFFER +0x12300238: 0x2c6c05ff: 2D, unknown, pitch = 1536 bytes, tiled, HiZ 1, Separate Stencil 1 +0x1230023c: 0x00000000: depth offset +0x12300240: 0x09584ac0: 300x300 +0x12300244: 0x00000000: volume depth +0x12300248: 0x00000000: +0x1230024c: 0x00000000: +0x12300250: 0x790f0001: 3DSTATE_HIER_DEPTH_BUFFER +0x12300254: 0x000005ff: dword 1 +0x12300258: 0x00000000: dword 2 +0x1230025c: 0x790e0001: 3D UNKNOWN: 3d_965 opcode = 0x790e +0x12300260: 0x0000027f: MI_NOOP +0x12300264: 0x00000000: MI_NOOP +0x12300268: 0x79100000: 3DSTATE_CLEAR_PARAMS +0x1230026c: 0x00000000: dword 1 +0x12300270: 0x79000002: 3DSTATE_DRAWING_RECTANGLE +0x12300274: 0x00000000: top left: 0,0 +0x12300278: 0x012b012b: bottom right: 299,299 +0x1230027c: 0x00000000: origin: 0,0 +0x12300280: 0x790b0002: 3DSTATE_GS_SVB_INDEX +0x12300284: 0x00000000: dword 1 +0x12300288: 0x00000000: dword 2 +0x1230028c: 0x00000000: dword 3 +0x12300290: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300294: 0x0000000c: buffer 0: sequential, pitch 12b +0x12300298: 0x00000000: buffer address +0x1230029c: 0x0000ffff: max index +0x123002a0: 0x00000000: mbz +0x123002a4: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x123002a8: 0x02400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123002ac: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123002b0: 0x7b001804: 3DPRIMITIVE: tri fan sequential +0x123002b4: 0x00000004: vertex count +0x123002b8: 0x00000000: start vertex +0x123002bc: 0x00000001: instance count +0x123002c0: 0x00000000: start instance +0x123002c4: 0x00000000: index bias +0x123002c8: 0x78050001: 3DSTATE_URB +0x123002cc: 0x00000100: VS entries 256, alloc size 1 (1024bit row) +0x123002d0: 0x00000000: GS entries 0, alloc size 1 (1024bit row) +0x123002d4: 0x780e0002: 3DSTATE_CC_STATE_POINTERS +0x123002d8: 0x00007f81: blend change 1 +0x123002dc: 0x00007cc1: depth stencil change 1 +0x123002e0: 0x00007d01: cc change 1 +0x123002e4: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x123002e8: 0x00007b85: dword 1 +0x123002ec: 0x00000000: dword 2 +0x123002f0: 0x00000000: dword 3 +0x123002f4: 0x00000000: dword 4 +0x123002f8: 0x78100004: 3DSTATE_VS +0x123002fc: 0x00000240: kernel pointer +0x12300300: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300304: 0x00000000: scratch offset +0x12300308: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x1230030c: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300310: 0x7a000002: PIPE_CONTROL +0x12300314: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300318: 0x00000000: +0x1230031c: 0x00000000: +0x12300320: 0x7a000002: PIPE_CONTROL +0x12300324: 0x00004000: qword write, +0x12300328: 0x00000000: +0x1230032c: 0x00000000: +0x12300330: 0x7a000002: PIPE_CONTROL +0x12300334: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300338: 0x00000000: +0x1230033c: 0x00000000: +0x12300340: 0x78120002: 3DSTATE_CLIP +0x12300344: 0x00000400: UserClip distance cull test mask 0x0 +0x12300348: 0x98000026: Clip enable, API mode OGL, Viewport XY test enable, Viewport Z test enable, Guardband test disable, Clip mode 0, Perspective Divide enable, Non-Perspective Barycentric disable, Tri Provoking 2, Line Provoking 1, Trifan Provoking 2 +0x1230034c: 0x0003ffe0: Min PointWidth 1, Max PointWidth 2047, Force Zero RTAIndex enable, Max VPIndex 0 +0x12300350: 0x78130012: 3DSTATE_SF +0x12300354: 0x00600810: Attrib Out 1, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x12300358: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x1230035c: 0x62000000: AA disable, CullMode 3, Scissor disable, Multisample m ode 0 +0x12300360: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x12300364: 0x00000000: Global Depth Offset Constant 0.000000 +0x12300368: 0x00000000: Global Depth Offset Scale 0.000000 +0x1230036c: 0x00000000: Global Depth Offset Clamp 0.000000 +0x12300370: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300374: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300378: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x1230037c: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300380: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300384: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300388: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x1230038c: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300390: 0x00000000: Point Sprite TexCoord Enable +0x12300394: 0x00000001: Const Interp Enable +0x12300398: 0x00000000: Attrib 7-0 WrapShortest Enable +0x1230039c: 0x00000000: Attrib 15-8 WrapShortest Enable +0x123003a0: 0x78170003: 3DSTATE_CONSTANT_PS_STATE +0x123003a4: 0x00000000: dword 1 +0x123003a8: 0x00000000: dword 2 +0x123003ac: 0x00000000: dword 3 +0x123003b0: 0x00000000: dword 4 +0x123003b4: 0x78140007: 3DSTATE_WM +0x123003b8: 0x00000500: kernel start pointer 0 +0x123003bc: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x123003c0: 0x00000000: scratch offset +0x123003c4: 0x80020000: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 2, start[1] 0, start[2] 0 +0x123003c8: 0x4e084002: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 0 +0x123003cc: 0x00100000: Num SF output 1, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x0, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x123003d0: 0x00000000: kernel start pointer 1 +0x123003d4: 0x00000500: kernel start pointer 2 +0x123003d8: 0x78011302: 3DSTATE_BINDING_TABLE_POINTERS: VS mod 1, GS mod 1, PS mod 1 +0x123003dc: 0x00007a00: VS binding table +0x123003e0: 0x00007a00: GS binding table +0x123003e4: 0x00007a00: WM binding table +0x123003e8: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123003ec: 0x0000000c: buffer 0: sequential, pitch 12b +0x123003f0: 0x00000000: buffer address +0x123003f4: 0x00007fff: max index +0x123003f8: 0x00000000: mbz +0x123003fc: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300400: 0x00000052: vertex count +0x12300404: 0x00000000: start vertex +0x12300408: 0x00000001: instance count +0x1230040c: 0x00000000: start instance +0x12300410: 0x00000000: index bias +0x12300414: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300418: 0x00000050: vertex count +0x1230041c: 0x00000052: start vertex +0x12300420: 0x00000001: instance count +0x12300424: 0x00000000: start instance +0x12300428: 0x00000000: index bias +0x1230042c: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300430: 0x000078c5: dword 1 +0x12300434: 0x00000000: dword 2 +0x12300438: 0x00000000: dword 3 +0x1230043c: 0x00000000: dword 4 +0x12300440: 0x78100004: 3DSTATE_VS +0x12300444: 0x00000240: kernel pointer +0x12300448: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x1230044c: 0x00000000: scratch offset +0x12300450: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300454: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300458: 0x7a000002: PIPE_CONTROL +0x1230045c: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300460: 0x00000000: +0x12300464: 0x00000000: +0x12300468: 0x7a000002: PIPE_CONTROL +0x1230046c: 0x00004000: qword write, +0x12300470: 0x00000000: +0x12300474: 0x00000000: +0x12300478: 0x7a000002: PIPE_CONTROL +0x1230047c: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300480: 0x00000000: +0x12300484: 0x00000000: +0x12300488: 0x78170003: 3DSTATE_CONSTANT_PS_STATE +0x1230048c: 0x00000000: dword 1 +0x12300490: 0x00000000: dword 2 +0x12300494: 0x00000000: dword 3 +0x12300498: 0x00000000: dword 4 +0x1230049c: 0x78140007: 3DSTATE_WM +0x123004a0: 0x00000500: kernel start pointer 0 +0x123004a4: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x123004a8: 0x00000000: scratch offset +0x123004ac: 0x80020000: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 2, start[1] 0, start[2] 0 +0x123004b0: 0x4e084002: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 0 +0x123004b4: 0x00100000: Num SF output 1, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x0, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x123004b8: 0x00000000: kernel start pointer 1 +0x123004bc: 0x00000500: kernel start pointer 2 +0x123004c0: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123004c4: 0x00000052: vertex count +0x123004c8: 0x000000a2: start vertex +0x123004cc: 0x00000001: instance count +0x123004d0: 0x00000000: start instance +0x123004d4: 0x00000000: index bias +0x123004d8: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x123004dc: 0x00000050: vertex count +0x123004e0: 0x000000f4: start vertex +0x123004e4: 0x00000001: instance count +0x123004e8: 0x00000000: start instance +0x123004ec: 0x00000000: index bias +0x123004f0: 0x78050001: 3DSTATE_URB +0x123004f4: 0x00000100: VS entries 256, alloc size 1 (1024bit row) +0x123004f8: 0x00000000: GS entries 0, alloc size 1 (1024bit row) +0x123004fc: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300500: 0x00007785: dword 1 +0x12300504: 0x00000000: dword 2 +0x12300508: 0x00000000: dword 3 +0x1230050c: 0x00000000: dword 4 +0x12300510: 0x78100004: 3DSTATE_VS +0x12300514: 0x00000640: kernel pointer +0x12300518: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x1230051c: 0x00000000: scratch offset +0x12300520: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300524: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300528: 0x7a000002: PIPE_CONTROL +0x1230052c: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300530: 0x00000000: +0x12300534: 0x00000000: +0x12300538: 0x7a000002: PIPE_CONTROL +0x1230053c: 0x00004000: qword write, +0x12300540: 0x00000000: +0x12300544: 0x00000000: +0x12300548: 0x7a000002: PIPE_CONTROL +0x1230054c: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300550: 0x00000000: +0x12300554: 0x00000000: +0x12300558: 0x78130012: 3DSTATE_SF +0x1230055c: 0x00600810: Attrib Out 1, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x12300560: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x12300564: 0x62000000: AA disable, CullMode 3, Scissor disable, Multisample m ode 0 +0x12300568: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x1230056c: 0x00000000: Global Depth Offset Constant 0.000000 +0x12300570: 0x00000000: Global Depth Offset Scale 0.000000 +0x12300574: 0x00000000: Global Depth Offset Clamp 0.000000 +0x12300578: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x1230057c: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300580: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300584: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300588: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x1230058c: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300590: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300594: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300598: 0x00000000: Point Sprite TexCoord Enable +0x1230059c: 0x00000001: Const Interp Enable +0x123005a0: 0x00000000: Attrib 7-0 WrapShortest Enable +0x123005a4: 0x00000000: Attrib 15-8 WrapShortest Enable +0x123005a8: 0x78011302: 3DSTATE_BINDING_TABLE_POINTERS: VS mod 1, GS mod 1, PS mod 1 +0x123005ac: 0x00007600: VS binding table +0x123005b0: 0x00007600: GS binding table +0x123005b4: 0x00007600: WM binding table +0x123005b8: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123005bc: 0x00000018: buffer 0: sequential, pitch 24b +0x123005c0: 0x00000f48: buffer address +0x123005c4: 0x00007fff: max index +0x123005c8: 0x00000000: mbz +0x123005cc: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x123005d0: 0x02400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123005d4: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123005d8: 0x0240000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x123005dc: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123005e0: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123005e4: 0x000000a2: vertex count +0x123005e8: 0x00000000: start vertex +0x123005ec: 0x00000001: instance count +0x123005f0: 0x00000000: start instance +0x123005f4: 0x00000000: index bias +0x123005f8: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x123005fc: 0x000074c5: dword 1 +0x12300600: 0x00000000: dword 2 +0x12300604: 0x00000000: dword 3 +0x12300608: 0x00000000: dword 4 +0x1230060c: 0x78100004: 3DSTATE_VS +0x12300610: 0x00000640: kernel pointer +0x12300614: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300618: 0x00000000: scratch offset +0x1230061c: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300620: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300624: 0x7a000002: PIPE_CONTROL +0x12300628: 0x00100002: no write, cs stall, stall at scoreboard, +0x1230062c: 0x00000000: +0x12300630: 0x00000000: +0x12300634: 0x7a000002: PIPE_CONTROL +0x12300638: 0x00004000: qword write, +0x1230063c: 0x00000000: +0x12300640: 0x00000000: +0x12300644: 0x7a000002: PIPE_CONTROL +0x12300648: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x1230064c: 0x00000000: +0x12300650: 0x00000000: +0x12300654: 0x78120002: 3DSTATE_CLIP +0x12300658: 0x00000400: UserClip distance cull test mask 0x0 +0x1230065c: 0x98000026: Clip enable, API mode OGL, Viewport XY test enable, Viewport Z test enable, Guardband test disable, Clip mode 0, Perspective Divide enable, Non-Perspective Barycentric disable, Tri Provoking 2, Line Provoking 1, Trifan Provoking 2 +0x12300660: 0x0003ffe0: Min PointWidth 1, Max PointWidth 2047, Force Zero RTAIndex enable, Max VPIndex 0 +0x12300664: 0x78130012: 3DSTATE_SF +0x12300668: 0x00600810: Attrib Out 1, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x1230066c: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x12300670: 0x62000000: AA disable, CullMode 3, Scissor disable, Multisample m ode 0 +0x12300674: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x12300678: 0x00000000: Global Depth Offset Constant 0.000000 +0x1230067c: 0x00000000: Global Depth Offset Scale 0.000000 +0x12300680: 0x00000000: Global Depth Offset Clamp 0.000000 +0x12300684: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300688: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x1230068c: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300690: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300694: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300698: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x1230069c: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123006a0: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123006a4: 0x00000000: Point Sprite TexCoord Enable +0x123006a8: 0x00000000: Const Interp Enable +0x123006ac: 0x00000000: Attrib 7-0 WrapShortest Enable +0x123006b0: 0x00000000: Attrib 15-8 WrapShortest Enable +0x123006b4: 0x78170003: 3DSTATE_CONSTANT_PS_STATE +0x123006b8: 0x00000000: dword 1 +0x123006bc: 0x00000000: dword 2 +0x123006c0: 0x00000000: dword 3 +0x123006c4: 0x00000000: dword 4 +0x123006c8: 0x78140007: 3DSTATE_WM +0x123006cc: 0x00000900: kernel start pointer 0 +0x123006d0: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x123006d4: 0x00000000: scratch offset +0x123006d8: 0x80060000: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 6, start[1] 0, start[2] 0 +0x123006dc: 0x4e084002: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 0 +0x123006e0: 0x00100400: Num SF output 1, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x1, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x123006e4: 0x00000000: kernel start pointer 1 +0x123006e8: 0x00000900: kernel start pointer 2 +0x123006ec: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123006f0: 0x0000002a: vertex count +0x123006f4: 0x000000a2: start vertex +0x123006f8: 0x00000001: instance count +0x123006fc: 0x00000000: start instance +0x12300700: 0x00000000: index bias +0x12300704: 0x78050001: 3DSTATE_URB +0x12300708: 0x00000100: VS entries 256, alloc size 1 (1024bit row) +0x1230070c: 0x00000000: GS entries 0, alloc size 1 (1024bit row) +0x12300710: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300714: 0x00007385: dword 1 +0x12300718: 0x00000000: dword 2 +0x1230071c: 0x00000000: dword 3 +0x12300720: 0x00000000: dword 4 +0x12300724: 0x78100004: 3DSTATE_VS +0x12300728: 0x00000240: kernel pointer +0x1230072c: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300730: 0x00000000: scratch offset +0x12300734: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300738: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x1230073c: 0x7a000002: PIPE_CONTROL +0x12300740: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300744: 0x00000000: +0x12300748: 0x00000000: +0x1230074c: 0x7a000002: PIPE_CONTROL +0x12300750: 0x00004000: qword write, +0x12300754: 0x00000000: +0x12300758: 0x00000000: +0x1230075c: 0x7a000002: PIPE_CONTROL +0x12300760: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300764: 0x00000000: +0x12300768: 0x00000000: +0x1230076c: 0x78120002: 3DSTATE_CLIP +0x12300770: 0x00000400: UserClip distance cull test mask 0x0 +0x12300774: 0x98000026: Clip enable, API mode OGL, Viewport XY test enable, Viewport Z test enable, Guardband test disable, Clip mode 0, Perspective Divide enable, Non-Perspective Barycentric disable, Tri Provoking 2, Line Provoking 1, Trifan Provoking 2 +0x12300778: 0x0003ffe0: Min PointWidth 1, Max PointWidth 2047, Force Zero RTAIndex enable, Max VPIndex 0 +0x1230077c: 0x78130012: 3DSTATE_SF +0x12300780: 0x00600810: Attrib Out 1, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x12300784: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x12300788: 0x62000000: AA disable, CullMode 3, Scissor disable, Multisample m ode 0 +0x1230078c: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x12300790: 0x00000000: Global Depth Offset Constant 0.000000 +0x12300794: 0x00000000: Global Depth Offset Scale 0.000000 +0x12300798: 0x00000000: Global Depth Offset Clamp 0.000000 +0x1230079c: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123007a0: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123007a4: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123007a8: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123007ac: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123007b0: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123007b4: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123007b8: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123007bc: 0x00000000: Point Sprite TexCoord Enable +0x123007c0: 0x00000001: Const Interp Enable +0x123007c4: 0x00000000: Attrib 7-0 WrapShortest Enable +0x123007c8: 0x00000000: Attrib 15-8 WrapShortest Enable +0x123007cc: 0x78170003: 3DSTATE_CONSTANT_PS_STATE +0x123007d0: 0x00000000: dword 1 +0x123007d4: 0x00000000: dword 2 +0x123007d8: 0x00000000: dword 3 +0x123007dc: 0x00000000: dword 4 +0x123007e0: 0x78140007: 3DSTATE_WM +0x123007e4: 0x00000500: kernel start pointer 0 +0x123007e8: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x123007ec: 0x00000000: scratch offset +0x123007f0: 0x80020000: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 2, start[1] 0, start[2] 0 +0x123007f4: 0x4e084002: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 0 +0x123007f8: 0x00100000: Num SF output 1, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x0, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x123007fc: 0x00000000: kernel start pointer 1 +0x12300800: 0x00000500: kernel start pointer 2 +0x12300804: 0x78011302: 3DSTATE_BINDING_TABLE_POINTERS: VS mod 1, GS mod 1, PS mod 1 +0x12300808: 0x00007200: VS binding table +0x1230080c: 0x00007200: GS binding table +0x12300810: 0x00007200: WM binding table +0x12300814: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300818: 0x0000000c: buffer 0: sequential, pitch 12b +0x1230081c: 0x00002268: buffer address +0x12300820: 0x00007fff: max index +0x12300824: 0x00000000: mbz +0x12300828: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x1230082c: 0x02400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300830: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300834: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300838: 0x0000002a: vertex count +0x1230083c: 0x00000000: start vertex +0x12300840: 0x00000001: instance count +0x12300844: 0x00000000: start instance +0x12300848: 0x00000000: index bias +0x1230084c: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300850: 0x00000028: vertex count +0x12300854: 0x0000002a: start vertex +0x12300858: 0x00000001: instance count +0x1230085c: 0x00000000: start instance +0x12300860: 0x00000000: index bias +0x12300864: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300868: 0x000070c5: dword 1 +0x1230086c: 0x00000000: dword 2 +0x12300870: 0x00000000: dword 3 +0x12300874: 0x00000000: dword 4 +0x12300878: 0x78100004: 3DSTATE_VS +0x1230087c: 0x00000240: kernel pointer +0x12300880: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300884: 0x00000000: scratch offset +0x12300888: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x1230088c: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300890: 0x7a000002: PIPE_CONTROL +0x12300894: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300898: 0x00000000: +0x1230089c: 0x00000000: +0x123008a0: 0x7a000002: PIPE_CONTROL +0x123008a4: 0x00004000: qword write, +0x123008a8: 0x00000000: +0x123008ac: 0x00000000: +0x123008b0: 0x7a000002: PIPE_CONTROL +0x123008b4: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x123008b8: 0x00000000: +0x123008bc: 0x00000000: +0x123008c0: 0x78170003: 3DSTATE_CONSTANT_PS_STATE +0x123008c4: 0x00000000: dword 1 +0x123008c8: 0x00000000: dword 2 +0x123008cc: 0x00000000: dword 3 +0x123008d0: 0x00000000: dword 4 +0x123008d4: 0x78140007: 3DSTATE_WM +0x123008d8: 0x00000500: kernel start pointer 0 +0x123008dc: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x123008e0: 0x00000000: scratch offset +0x123008e4: 0x80020000: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 2, start[1] 0, start[2] 0 +0x123008e8: 0x4e084002: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 0 +0x123008ec: 0x00100000: Num SF output 1, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x0, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x123008f0: 0x00000000: kernel start pointer 1 +0x123008f4: 0x00000500: kernel start pointer 2 +0x123008f8: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123008fc: 0x0000002a: vertex count +0x12300900: 0x00000052: start vertex +0x12300904: 0x00000001: instance count +0x12300908: 0x00000000: start instance +0x1230090c: 0x00000000: index bias +0x12300910: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300914: 0x00000028: vertex count +0x12300918: 0x0000007c: start vertex +0x1230091c: 0x00000001: instance count +0x12300920: 0x00000000: start instance +0x12300924: 0x00000000: index bias +0x12300928: 0x78050001: 3DSTATE_URB +0x1230092c: 0x00000100: VS entries 256, alloc size 1 (1024bit row) +0x12300930: 0x00000000: GS entries 0, alloc size 1 (1024bit row) +0x12300934: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300938: 0x00006f85: dword 1 +0x1230093c: 0x00000000: dword 2 +0x12300940: 0x00000000: dword 3 +0x12300944: 0x00000000: dword 4 +0x12300948: 0x78100004: 3DSTATE_VS +0x1230094c: 0x00000640: kernel pointer +0x12300950: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300954: 0x00000000: scratch offset +0x12300958: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x1230095c: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300960: 0x7a000002: PIPE_CONTROL +0x12300964: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300968: 0x00000000: +0x1230096c: 0x00000000: +0x12300970: 0x7a000002: PIPE_CONTROL +0x12300974: 0x00004000: qword write, +0x12300978: 0x00000000: +0x1230097c: 0x00000000: +0x12300980: 0x7a000002: PIPE_CONTROL +0x12300984: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300988: 0x00000000: +0x1230098c: 0x00000000: +0x12300990: 0x78130012: 3DSTATE_SF +0x12300994: 0x00600810: Attrib Out 1, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x12300998: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x1230099c: 0x62000000: AA disable, CullMode 3, Scissor disable, Multisample m ode 0 +0x123009a0: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x123009a4: 0x00000000: Global Depth Offset Constant 0.000000 +0x123009a8: 0x00000000: Global Depth Offset Scale 0.000000 +0x123009ac: 0x00000000: Global Depth Offset Clamp 0.000000 +0x123009b0: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123009b4: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123009b8: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123009bc: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123009c0: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123009c4: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123009c8: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123009cc: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x123009d0: 0x00000000: Point Sprite TexCoord Enable +0x123009d4: 0x00000001: Const Interp Enable +0x123009d8: 0x00000000: Attrib 7-0 WrapShortest Enable +0x123009dc: 0x00000000: Attrib 15-8 WrapShortest Enable +0x123009e0: 0x78011302: 3DSTATE_BINDING_TABLE_POINTERS: VS mod 1, GS mod 1, PS mod 1 +0x123009e4: 0x00006e00: VS binding table +0x123009e8: 0x00006e00: GS binding table +0x123009ec: 0x00006e00: WM binding table +0x123009f0: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123009f4: 0x00000018: buffer 0: sequential, pitch 24b +0x123009f8: 0x00002a30: buffer address +0x123009fc: 0x00007fff: max index +0x12300a00: 0x00000000: mbz +0x12300a04: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x12300a08: 0x02400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300a0c: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300a10: 0x0240000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x12300a14: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300a18: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300a1c: 0x00000052: vertex count +0x12300a20: 0x00000000: start vertex +0x12300a24: 0x00000001: instance count +0x12300a28: 0x00000000: start instance +0x12300a2c: 0x00000000: index bias +0x12300a30: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300a34: 0x00006cc5: dword 1 +0x12300a38: 0x00000000: dword 2 +0x12300a3c: 0x00000000: dword 3 +0x12300a40: 0x00000000: dword 4 +0x12300a44: 0x78100004: 3DSTATE_VS +0x12300a48: 0x00000640: kernel pointer +0x12300a4c: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300a50: 0x00000000: scratch offset +0x12300a54: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300a58: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300a5c: 0x7a000002: PIPE_CONTROL +0x12300a60: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300a64: 0x00000000: +0x12300a68: 0x00000000: +0x12300a6c: 0x7a000002: PIPE_CONTROL +0x12300a70: 0x00004000: qword write, +0x12300a74: 0x00000000: +0x12300a78: 0x00000000: +0x12300a7c: 0x7a000002: PIPE_CONTROL +0x12300a80: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300a84: 0x00000000: +0x12300a88: 0x00000000: +0x12300a8c: 0x78120002: 3DSTATE_CLIP +0x12300a90: 0x00000400: UserClip distance cull test mask 0x0 +0x12300a94: 0x98000026: Clip enable, API mode OGL, Viewport XY test enable, Viewport Z test enable, Guardband test disable, Clip mode 0, Perspective Divide enable, Non-Perspective Barycentric disable, Tri Provoking 2, Line Provoking 1, Trifan Provoking 2 +0x12300a98: 0x0003ffe0: Min PointWidth 1, Max PointWidth 2047, Force Zero RTAIndex enable, Max VPIndex 0 +0x12300a9c: 0x78130012: 3DSTATE_SF +0x12300aa0: 0x00600810: Attrib Out 1, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x12300aa4: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x12300aa8: 0x62000000: AA disable, CullMode 3, Scissor disable, Multisample m ode 0 +0x12300aac: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x12300ab0: 0x00000000: Global Depth Offset Constant 0.000000 +0x12300ab4: 0x00000000: Global Depth Offset Scale 0.000000 +0x12300ab8: 0x00000000: Global Depth Offset Clamp 0.000000 +0x12300abc: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300ac0: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300ac4: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300ac8: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300acc: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300ad0: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300ad4: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300ad8: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300adc: 0x00000000: Point Sprite TexCoord Enable +0x12300ae0: 0x00000000: Const Interp Enable +0x12300ae4: 0x00000000: Attrib 7-0 WrapShortest Enable +0x12300ae8: 0x00000000: Attrib 15-8 WrapShortest Enable +0x12300aec: 0x78170003: 3DSTATE_CONSTANT_PS_STATE +0x12300af0: 0x00000000: dword 1 +0x12300af4: 0x00000000: dword 2 +0x12300af8: 0x00000000: dword 3 +0x12300afc: 0x00000000: dword 4 +0x12300b00: 0x78140007: 3DSTATE_WM +0x12300b04: 0x00000900: kernel start pointer 0 +0x12300b08: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300b0c: 0x00000000: scratch offset +0x12300b10: 0x80060000: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 6, start[1] 0, start[2] 0 +0x12300b14: 0x4e084002: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 0 +0x12300b18: 0x00100400: Num SF output 1, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x1, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x12300b1c: 0x00000000: kernel start pointer 1 +0x12300b20: 0x00000900: kernel start pointer 2 +0x12300b24: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300b28: 0x00000016: vertex count +0x12300b2c: 0x00000052: start vertex +0x12300b30: 0x00000001: instance count +0x12300b34: 0x00000000: start instance +0x12300b38: 0x00000000: index bias +0x12300b3c: 0x78050001: 3DSTATE_URB +0x12300b40: 0x00000100: VS entries 256, alloc size 1 (1024bit row) +0x12300b44: 0x00000000: GS entries 0, alloc size 1 (1024bit row) +0x12300b48: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300b4c: 0x00006b85: dword 1 +0x12300b50: 0x00000000: dword 2 +0x12300b54: 0x00000000: dword 3 +0x12300b58: 0x00000000: dword 4 +0x12300b5c: 0x78100004: 3DSTATE_VS +0x12300b60: 0x00000240: kernel pointer +0x12300b64: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300b68: 0x00000000: scratch offset +0x12300b6c: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300b70: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300b74: 0x7a000002: PIPE_CONTROL +0x12300b78: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300b7c: 0x00000000: +0x12300b80: 0x00000000: +0x12300b84: 0x7a000002: PIPE_CONTROL +0x12300b88: 0x00004000: qword write, +0x12300b8c: 0x00000000: +0x12300b90: 0x00000000: +0x12300b94: 0x7a000002: PIPE_CONTROL +0x12300b98: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300b9c: 0x00000000: +0x12300ba0: 0x00000000: +0x12300ba4: 0x78120002: 3DSTATE_CLIP +0x12300ba8: 0x00000400: UserClip distance cull test mask 0x0 +0x12300bac: 0x98000026: Clip enable, API mode OGL, Viewport XY test enable, Viewport Z test enable, Guardband test disable, Clip mode 0, Perspective Divide enable, Non-Perspective Barycentric disable, Tri Provoking 2, Line Provoking 1, Trifan Provoking 2 +0x12300bb0: 0x0003ffe0: Min PointWidth 1, Max PointWidth 2047, Force Zero RTAIndex enable, Max VPIndex 0 +0x12300bb4: 0x78130012: 3DSTATE_SF +0x12300bb8: 0x00600810: Attrib Out 1, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x12300bbc: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x12300bc0: 0x62000000: AA disable, CullMode 3, Scissor disable, Multisample m ode 0 +0x12300bc4: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x12300bc8: 0x00000000: Global Depth Offset Constant 0.000000 +0x12300bcc: 0x00000000: Global Depth Offset Scale 0.000000 +0x12300bd0: 0x00000000: Global Depth Offset Clamp 0.000000 +0x12300bd4: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300bd8: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300bdc: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300be0: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300be4: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300be8: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300bec: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300bf0: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300bf4: 0x00000000: Point Sprite TexCoord Enable +0x12300bf8: 0x00000001: Const Interp Enable +0x12300bfc: 0x00000000: Attrib 7-0 WrapShortest Enable +0x12300c00: 0x00000000: Attrib 15-8 WrapShortest Enable +0x12300c04: 0x78170003: 3DSTATE_CONSTANT_PS_STATE +0x12300c08: 0x00000000: dword 1 +0x12300c0c: 0x00000000: dword 2 +0x12300c10: 0x00000000: dword 3 +0x12300c14: 0x00000000: dword 4 +0x12300c18: 0x78140007: 3DSTATE_WM +0x12300c1c: 0x00000500: kernel start pointer 0 +0x12300c20: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300c24: 0x00000000: scratch offset +0x12300c28: 0x80020000: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 2, start[1] 0, start[2] 0 +0x12300c2c: 0x4e084002: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 0 +0x12300c30: 0x00100000: Num SF output 1, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x0, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x12300c34: 0x00000000: kernel start pointer 1 +0x12300c38: 0x00000500: kernel start pointer 2 +0x12300c3c: 0x78011302: 3DSTATE_BINDING_TABLE_POINTERS: VS mod 1, GS mod 1, PS mod 1 +0x12300c40: 0x00006a00: VS binding table +0x12300c44: 0x00006a00: GS binding table +0x12300c48: 0x00006a00: WM binding table +0x12300c4c: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300c50: 0x0000000c: buffer 0: sequential, pitch 12b +0x12300c54: 0x000033f0: buffer address +0x12300c58: 0x00007fff: max index +0x12300c5c: 0x00000000: mbz +0x12300c60: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x12300c64: 0x02400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300c68: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300c6c: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300c70: 0x0000002a: vertex count +0x12300c74: 0x00000000: start vertex +0x12300c78: 0x00000001: instance count +0x12300c7c: 0x00000000: start instance +0x12300c80: 0x00000000: index bias +0x12300c84: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300c88: 0x00000028: vertex count +0x12300c8c: 0x0000002a: start vertex +0x12300c90: 0x00000001: instance count +0x12300c94: 0x00000000: start instance +0x12300c98: 0x00000000: index bias +0x12300c9c: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300ca0: 0x000068c5: dword 1 +0x12300ca4: 0x00000000: dword 2 +0x12300ca8: 0x00000000: dword 3 +0x12300cac: 0x00000000: dword 4 +0x12300cb0: 0x78100004: 3DSTATE_VS +0x12300cb4: 0x00000240: kernel pointer +0x12300cb8: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300cbc: 0x00000000: scratch offset +0x12300cc0: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300cc4: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300cc8: 0x7a000002: PIPE_CONTROL +0x12300ccc: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300cd0: 0x00000000: +0x12300cd4: 0x00000000: +0x12300cd8: 0x7a000002: PIPE_CONTROL +0x12300cdc: 0x00004000: qword write, +0x12300ce0: 0x00000000: +0x12300ce4: 0x00000000: +0x12300ce8: 0x7a000002: PIPE_CONTROL +0x12300cec: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300cf0: 0x00000000: +0x12300cf4: 0x00000000: +0x12300cf8: 0x78170003: 3DSTATE_CONSTANT_PS_STATE +0x12300cfc: 0x00000000: dword 1 +0x12300d00: 0x00000000: dword 2 +0x12300d04: 0x00000000: dword 3 +0x12300d08: 0x00000000: dword 4 +0x12300d0c: 0x78140007: 3DSTATE_WM +0x12300d10: 0x00000500: kernel start pointer 0 +0x12300d14: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300d18: 0x00000000: scratch offset +0x12300d1c: 0x80020000: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 2, start[1] 0, start[2] 0 +0x12300d20: 0x4e084002: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 0 +0x12300d24: 0x00100000: Num SF output 1, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x0, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x12300d28: 0x00000000: kernel start pointer 1 +0x12300d2c: 0x00000500: kernel start pointer 2 +0x12300d30: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300d34: 0x0000002a: vertex count +0x12300d38: 0x00000052: start vertex +0x12300d3c: 0x00000001: instance count +0x12300d40: 0x00000000: start instance +0x12300d44: 0x00000000: index bias +0x12300d48: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300d4c: 0x00000028: vertex count +0x12300d50: 0x0000007c: start vertex +0x12300d54: 0x00000001: instance count +0x12300d58: 0x00000000: start instance +0x12300d5c: 0x00000000: index bias +0x12300d60: 0x78050001: 3DSTATE_URB +0x12300d64: 0x00000100: VS entries 256, alloc size 1 (1024bit row) +0x12300d68: 0x00000000: GS entries 0, alloc size 1 (1024bit row) +0x12300d6c: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300d70: 0x00006785: dword 1 +0x12300d74: 0x00000000: dword 2 +0x12300d78: 0x00000000: dword 3 +0x12300d7c: 0x00000000: dword 4 +0x12300d80: 0x78100004: 3DSTATE_VS +0x12300d84: 0x00000640: kernel pointer +0x12300d88: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300d8c: 0x00000000: scratch offset +0x12300d90: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300d94: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300d98: 0x7a000002: PIPE_CONTROL +0x12300d9c: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300da0: 0x00000000: +0x12300da4: 0x00000000: +0x12300da8: 0x7a000002: PIPE_CONTROL +0x12300dac: 0x00004000: qword write, +0x12300db0: 0x00000000: +0x12300db4: 0x00000000: +0x12300db8: 0x7a000002: PIPE_CONTROL +0x12300dbc: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300dc0: 0x00000000: +0x12300dc4: 0x00000000: +0x12300dc8: 0x78130012: 3DSTATE_SF +0x12300dcc: 0x00600810: Attrib Out 1, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x12300dd0: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x12300dd4: 0x62000000: AA disable, CullMode 3, Scissor disable, Multisample m ode 0 +0x12300dd8: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x12300ddc: 0x00000000: Global Depth Offset Constant 0.000000 +0x12300de0: 0x00000000: Global Depth Offset Scale 0.000000 +0x12300de4: 0x00000000: Global Depth Offset Clamp 0.000000 +0x12300de8: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300dec: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300df0: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300df4: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300df8: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300dfc: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300e00: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300e04: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300e08: 0x00000000: Point Sprite TexCoord Enable +0x12300e0c: 0x00000001: Const Interp Enable +0x12300e10: 0x00000000: Attrib 7-0 WrapShortest Enable +0x12300e14: 0x00000000: Attrib 15-8 WrapShortest Enable +0x12300e18: 0x78011302: 3DSTATE_BINDING_TABLE_POINTERS: VS mod 1, GS mod 1, PS mod 1 +0x12300e1c: 0x00006600: VS binding table +0x12300e20: 0x00006600: GS binding table +0x12300e24: 0x00006600: WM binding table +0x12300e28: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300e2c: 0x00000018: buffer 0: sequential, pitch 24b +0x12300e30: 0x00003bb8: buffer address +0x12300e34: 0x00007fff: max index +0x12300e38: 0x00000000: mbz +0x12300e3c: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x12300e40: 0x02400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300e44: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300e48: 0x0240000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x12300e4c: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300e50: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300e54: 0x00000052: vertex count +0x12300e58: 0x00000000: start vertex +0x12300e5c: 0x00000001: instance count +0x12300e60: 0x00000000: start instance +0x12300e64: 0x00000000: index bias +0x12300e68: 0x78151003: 3DSTATE_CONSTANT_VS_STATE +0x12300e6c: 0x000064c5: dword 1 +0x12300e70: 0x00000000: dword 2 +0x12300e74: 0x00000000: dword 3 +0x12300e78: 0x00000000: dword 4 +0x12300e7c: 0x78100004: 3DSTATE_VS +0x12300e80: 0x00000640: kernel pointer +0x12300e84: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300e88: 0x00000000: scratch offset +0x12300e8c: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x12300e90: 0x76000401: Max Threads 60, Vertex Cache enable, VS func enable +0x12300e94: 0x7a000002: PIPE_CONTROL +0x12300e98: 0x00100002: no write, cs stall, stall at scoreboard, +0x12300e9c: 0x00000000: +0x12300ea0: 0x00000000: +0x12300ea4: 0x7a000002: PIPE_CONTROL +0x12300ea8: 0x00004000: qword write, +0x12300eac: 0x00000000: +0x12300eb0: 0x00000000: +0x12300eb4: 0x7a000002: PIPE_CONTROL +0x12300eb8: 0x00002804: no write, depth stall, instruction cache invalidate, state cache invalidate, +0x12300ebc: 0x00000000: +0x12300ec0: 0x00000000: +0x12300ec4: 0x78120002: 3DSTATE_CLIP +0x12300ec8: 0x00000400: UserClip distance cull test mask 0x0 +0x12300ecc: 0x98000026: Clip enable, API mode OGL, Viewport XY test enable, Viewport Z test enable, Guardband test disable, Clip mode 0, Perspective Divide enable, Non-Perspective Barycentric disable, Tri Provoking 2, Line Provoking 1, Trifan Provoking 2 +0x12300ed0: 0x0003ffe0: Min PointWidth 1, Max PointWidth 2047, Force Zero RTAIndex enable, Max VPIndex 0 +0x12300ed4: 0x78130012: 3DSTATE_SF +0x12300ed8: 0x00600810: Attrib Out 1, Attrib Swizzle enable, VUE read length 1, VUE read offset 1 +0x12300edc: 0x00000403: Legacy Global DepthBias disable, FrontFace fill 0, BF fill 0, VP transform enable, FrontWinding_CCW +0x12300ee0: 0x62000000: AA disable, CullMode 3, Scissor disable, Multisample m ode 0 +0x12300ee4: 0x4c000808: Last Pixel disable, SubPixel Precision 8, Use PixelWidth 1 +0x12300ee8: 0x00000000: Global Depth Offset Constant 0.000000 +0x12300eec: 0x00000000: Global Depth Offset Scale 0.000000 +0x12300ef0: 0x00000000: Global Depth Offset Clamp 0.000000 +0x12300ef4: 0x00000000: Attrib 1 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 0 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300ef8: 0x00000000: Attrib 3 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 2 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300efc: 0x00000000: Attrib 5 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 4 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300f00: 0x00000000: Attrib 7 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 6 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300f04: 0x00000000: Attrib 9 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 8 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300f08: 0x00000000: Attrib 11 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 10 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300f0c: 0x00000000: Attrib 13 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 12 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300f10: 0x00000000: Attrib 15 (Override , Const Source 0, Swizzle Select 0, Source 0); Attrib 14 (Override , Const Source 0, Swizzle Select 0, Source 0) +0x12300f14: 0x00000000: Point Sprite TexCoord Enable +0x12300f18: 0x00000000: Const Interp Enable +0x12300f1c: 0x00000000: Attrib 7-0 WrapShortest Enable +0x12300f20: 0x00000000: Attrib 15-8 WrapShortest Enable +0x12300f24: 0x78170003: 3DSTATE_CONSTANT_PS_STATE +0x12300f28: 0x00000000: dword 1 +0x12300f2c: 0x00000000: dword 2 +0x12300f30: 0x00000000: dword 3 +0x12300f34: 0x00000000: dword 4 +0x12300f38: 0x78140007: 3DSTATE_WM +0x12300f3c: 0x00000900: kernel start pointer 0 +0x12300f40: 0x00010000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x12300f44: 0x00000000: scratch offset +0x12300f48: 0x80060000: Depth Clear 0, Depth Resolve 0, HiZ Resolve 0, Dispatch GRF start[0] 6, start[1] 0, start[2] 0 +0x12300f4c: 0x4e084002: MaxThreads 40, PS KillPixel 0, PS computed Z 0, PS use sourceZ 0, Thread Dispatch 1, PS use sourceW 0, Dispatch32 0, Dispatch16 1, Dispatch8 0 +0x12300f50: 0x00100400: Num SF output 1, Pos XY offset 0, ZW interp mode 0 , Barycentric interp mode 0x1, Point raster rule 0, Multisample mode 0, Multisample Dispatch mode 0 +0x12300f54: 0x00000000: kernel start pointer 1 +0x12300f58: 0x00000900: kernel start pointer 2 +0x12300f5c: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300f60: 0x00000016: vertex count +0x12300f64: 0x00000052: start vertex +0x12300f68: 0x00000001: instance count +0x12300f6c: 0x00000000: start instance +0x12300f70: 0x00000000: index bias +0x12300f74: 0x05000000: MI_BATCH_BUFFER_END diff --git a/intel/tests/gen6-3d.batch.sh b/intel/tests/gen6-3d.batch.sh new file mode 120000 index 0000000..796ca5f --- /dev/null +++ b/intel/tests/gen6-3d.batch.sh @@ -0,0 +1 @@ +test-batch.sh
\ No newline at end of file diff --git a/intel/tests/gen7-2d-copy.batch b/intel/tests/gen7-2d-copy.batch Binary files differnew file mode 100644 index 0000000..ce7fc29 --- /dev/null +++ b/intel/tests/gen7-2d-copy.batch diff --git a/intel/tests/gen7-2d-copy.batch-ref.txt b/intel/tests/gen7-2d-copy.batch-ref.txt new file mode 100644 index 0000000..0d621d3 --- /dev/null +++ b/intel/tests/gen7-2d-copy.batch-ref.txt @@ -0,0 +1,14 @@ +0x12300000: 0x54f08006: XY_SRC_COPY_BLT (rgb enabled, alpha enabled, src tile 1, dst tile 0) +0x12300004: 0x03cc0190: format 8888, pitch 400, rop 0xcc, clipping disabled, +0x12300008: 0x00000000: dst (0,0) +0x1230000c: 0x00640064: dst (100,100) +0x12300010: 0x122e9000: dst offset 0x122e9000 +0x12300014: 0x00000000: src (0,0) +0x12300018: 0x00000080: src pitch 128 +0x1230001c: 0x02ff1000: src offset 0x02ff1000 +0x12300020: 0x13000002: MI_FLUSH_DW post_sync_op='no write' +0x12300024: 0x00000000: address +0x12300028: 0x00000000: dword +0x1230002c: 0x00000000: upper dword +0x12300030: 0x05000000: MI_BATCH_BUFFER_END +0x12300034: 0x00000000: diff --git a/intel/tests/gen7-2d-copy.batch.sh b/intel/tests/gen7-2d-copy.batch.sh new file mode 120000 index 0000000..796ca5f --- /dev/null +++ b/intel/tests/gen7-2d-copy.batch.sh @@ -0,0 +1 @@ +test-batch.sh
\ No newline at end of file diff --git a/intel/tests/gen7-3d.batch b/intel/tests/gen7-3d.batch Binary files differnew file mode 100644 index 0000000..328ec88 --- /dev/null +++ b/intel/tests/gen7-3d.batch diff --git a/intel/tests/gen7-3d.batch-ref.txt b/intel/tests/gen7-3d.batch-ref.txt new file mode 100644 index 0000000..cd2dfc4 --- /dev/null +++ b/intel/tests/gen7-3d.batch-ref.txt @@ -0,0 +1,212 @@ +0x12300000: 0x69040000: 3DSTATE_PIPELINE_SELECT +0x12300004: 0x790d0002: 3DSTATE_MULTISAMPLE +0x12300008: 0x00000000: dword 1 +0x1230000c: 0x00000000: dword 2 +0x12300010: 0x00000000: dword 3 +0x12300014: 0x78180000: 3DSTATE_SAMPLE_MASK +0x12300018: 0x00000001: dword 1 +0x1230001c: 0x61020000: STATE_SIP +0x12300020: 0x00000000: dword 1 +0x12300024: 0x680b0000: 3DSTATE_VF_STATISTICS +0x12300028: 0x61010008: STATE_BASE_ADDRESS +0x1230002c: 0x00000001: general state base address 0x00000000 +0x12300030: 0x091ba001: surface state base address 0x091ba000 +0x12300034: 0x091ba001: dynamic state base address 0x091ba000 +0x12300038: 0x00000001: indirect state base address 0x00000000 +0x1230003c: 0x091c2001: instruction state base address 0x091c2000 +0x12300040: 0x00000001: general state upper bound disabled +0x12300044: 0x091c2001: dynamic state upper bound 0x091c2000 +0x12300048: 0x00000001: indirect state upper bound disabled +0x1230004c: 0x00000001: instruction state upper bound disabled +0x12300050: 0x78230000: 3DSTATE_VIEWPORT_STATE_POINTERS_CC +0x12300054: 0x00007fe0: pointer to CC viewport +0x12300058: 0x78210000: 3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP +0x1230005c: 0x00007f80: pointer to SF_CLIP viewport +0x12300060: 0x78300000: 3DSTATE_URB_VS +0x12300064: 0x040002c0: 16KB start, size=1 64B rows, nr_entries=704, total size 45056B +0x12300068: 0x78330000: 3DSTATE_URB_GS +0x1230006c: 0x04000000: 16KB start, size=1 64B rows, nr_entries=0, total size 0B +0x12300070: 0x78310000: 3DSTATE_URB_HS +0x12300074: 0x04000000: 16KB start, size=1 64B rows, nr_entries=0, total size 0B +0x12300078: 0x78320000: 3DSTATE_URB_DS +0x1230007c: 0x04000000: 16KB start, size=1 64B rows, nr_entries=0, total size 0B +0x12300080: 0x78240000: 3DSTATE_BLEND_STATE_POINTERS +0x12300084: 0x00007f41: pointer to BLEND_STATE at 0x00007f40 (changed) +0x12300088: 0x780e0000: 3DSTATE_CC_STATE_POINTERS +0x1230008c: 0x00007f01: pointer to COLOR_CALC_STATE at 0x00007f00 (changed) +0x12300090: 0x78250000: 3DSTATE_DEPTH_STENCIL_STATE_POINTERS +0x12300094: 0x00007ec1: pointer to DEPTH_STENCIL_STATE at 0x00007ec0 (changed) +0x12300098: 0x78160005: 3DSTATE_CONSTANT_GS +0x1230009c: 0x00000000: len 0 = 0, len 1 = 0 +0x123000a0: 0x00000000: len 2 = 0, len 3 = 0 +0x123000a4: 0x00000000: pointer to constbuf 0 +0x123000a8: 0x00000000: pointer to constbuf 1 +0x123000ac: 0x00000000: pointer to constbuf 2 +0x123000b0: 0x00000000: pointer to constbuf 3 +0x123000b4: 0x78110005: 3DSTATE_GS +0x123000b8: 0x00000000: kernel pointer +0x123000bc: 0x00000000: SPF=0, VME=0, Sampler Count 0, Binding table count 0 +0x123000c0: 0x00000000: scratch offset +0x123000c4: 0x00000401: Dispatch GRF start 1, VUE read length 0, VUE read offset 0 +0x123000c8: 0x00000400: Max Threads 1, Rendering disable +0x123000cc: 0x00000000: Reorder disable, Discard Adjaceny disable, GS disable +0x123000d0: 0x78290000: 3DSTATE_BINDING_TABLE_POINTERS_GS +0x123000d4: 0x00000000: dword 1 +0x123000d8: 0x78190005: 3DSTATE_CONSTANT_HS +0x123000dc: 0x00000000: len 0 = 0, len 1 = 0 +0x123000e0: 0x00000000: len 2 = 0, len 3 = 0 +0x123000e4: 0x00000000: pointer to constbuf 0 +0x123000e8: 0x00000000: pointer to constbuf 1 +0x123000ec: 0x00000000: pointer to constbuf 2 +0x123000f0: 0x00000000: pointer to constbuf 3 +0x123000f4: 0x781b0005: 3DSTATE_HS +0x123000f8: 0x00000000: dword 1 +0x123000fc: 0x00000000: dword 2 +0x12300100: 0x00000000: dword 3 +0x12300104: 0x00000000: dword 4 +0x12300108: 0x00000000: dword 5 +0x1230010c: 0x00000000: dword 6 +0x12300110: 0x78270000: 3DSTATE_BINDING_TABLE_POINTERS_HS +0x12300114: 0x00000000: dword 1 +0x12300118: 0x781c0002: 3DSTATE_TE +0x1230011c: 0x00000000: dword 1 +0x12300120: 0x00000000: dword 2 +0x12300124: 0x00000000: dword 3 +0x12300128: 0x781a0005: 3DSTATE_CONSTANT_DS +0x1230012c: 0x00000000: len 0 = 0, len 1 = 0 +0x12300130: 0x00000000: len 2 = 0, len 3 = 0 +0x12300134: 0x00000000: pointer to constbuf 0 +0x12300138: 0x00000000: pointer to constbuf 1 +0x1230013c: 0x00000000: pointer to constbuf 2 +0x12300140: 0x00000000: pointer to constbuf 3 +0x12300144: 0x781d0004: 3DSTATE_DS +0x12300148: 0x00000000: dword 1 +0x1230014c: 0x00000000: dword 2 +0x12300150: 0x00000000: dword 3 +0x12300154: 0x00000000: dword 4 +0x12300158: 0x00000000: dword 5 +0x1230015c: 0x78280000: 3DSTATE_BINDING_TABLE_POINTERS_DS +0x12300160: 0x00000000: dword 1 +0x12300164: 0x78260000: 3DSTATE_BINDING_TABLE_POINTERS_VS +0x12300168: 0x00007c40: dword 1 +0x1230016c: 0x782b0000: 3DSTATE_SAMPLER_STATE_POINTERS_VS +0x12300170: 0x00007c20: dword 1 +0x12300174: 0x79120000: 3DSTATE_PUSH_CONSTANT_ALLOC_VS +0x12300178: 0x00000008: dword 1 +0x1230017c: 0x78150005: 3DSTATE_CONSTANT_VS +0x12300180: 0x00000002: len 0 = 2, len 1 = 0 +0x12300184: 0x00000000: len 2 = 0, len 3 = 0 +0x12300188: 0x00007e00: pointer to constbuf 0 +0x1230018c: 0x00000000: pointer to constbuf 1 +0x12300190: 0x00000000: pointer to constbuf 2 +0x12300194: 0x00000000: pointer to constbuf 3 +0x12300198: 0x78100004: 3DSTATE_VS +0x1230019c: 0x00000000: kernel pointer +0x123001a0: 0x08000000: SPF=0, VME=0, Sampler Count 1, Binding table count 0 +0x123001a4: 0x00000000: scratch offset +0x123001a8: 0x00100800: Dispatch GRF start 1, VUE read length 1, VUE read offset 0 +0x123001ac: 0xfe000401: Max Threads 128, Vertex Cache enable, VS func enable +0x123001b0: 0x781e0001: 3DSTATE_STREAMOUT +0x123001b4: 0x00000000: dword 1 +0x123001b8: 0x00000000: dword 2 +0x123001bc: 0x78120002: 3DSTATE_CLIP +0x123001c0: 0x00150400: UserClip distance cull test mask 0x0 +0x123001c4: 0x98000026: Clip enable, API mode OGL, Viewport XY test enable, Viewport Z test enable, Guardband test disable, Clip mode 0, Perspective Divide enable, Non-Perspective Barycentric disable, Tri Provoking 2, Line Provoking 1, Trifan Provoking 2 +0x123001c8: 0x0003ffe0: Min PointWidth 1, Max PointWidth 2047, Force Zero RTAIndex enable, Max VPIndex 0 +0x123001cc: 0x781f000c: 3DSTATE_SBE +0x123001d0: 0x00600810: dword 1 +0x123001d4: 0x00000000: dword 2 +0x123001d8: 0x00000000: dword 3 +0x123001dc: 0x00000000: dword 4 +0x123001e0: 0x00000000: dword 5 +0x123001e4: 0x00000000: dword 6 +0x123001e8: 0x00000000: dword 7 +0x123001ec: 0x00000000: dword 8 +0x123001f0: 0x00000000: dword 9 +0x123001f4: 0x00000000: dword 10 +0x123001f8: 0x00000000: dword 11 +0x123001fc: 0x00000000: dword 12 +0x12300200: 0x00000000: dword 13 +0x12300204: 0x78130005: 3DSTATE_SF +0x12300208: 0x00001403: dword 1 +0x1230020c: 0x22000000: dword 2 +0x12300210: 0x4c000808: dword 3 +0x12300214: 0x00000000: dword 4 +0x12300218: 0x00000000: dword 5 +0x1230021c: 0x00000000: dword 6 +0x12300220: 0x78140001: 3DSTATE_WM +0x12300224: 0xa0000840: (PP ), point UR +0x12300228: 0x00000000: MS +0x1230022c: 0x782a0000: 3DSTATE_BINDING_TABLE_POINTERS_PS +0x12300230: 0x00007c40: dword 1 +0x12300234: 0x782f0000: 3DSTATE_SAMPLER_STATE_POINTERS_PS +0x12300238: 0x00007c20: dword 1 +0x1230023c: 0x79160000: 3DSTATE_PUSH_CONSTANT_ALLOC_PS +0x12300240: 0x00080008: dword 1 +0x12300244: 0x78170005: 3DSTATE_CONSTANT_PS +0x12300248: 0x00000000: len 0 = 0, len 1 = 0 +0x1230024c: 0x00000000: len 2 = 0, len 3 = 0 +0x12300250: 0x00000000: pointer to constbuf 0 +0x12300254: 0x00000000: pointer to constbuf 1 +0x12300258: 0x00000000: pointer to constbuf 2 +0x1230025c: 0x00000000: pointer to constbuf 3 +0x12300260: 0x78200006: 3DSTATE_PS +0x12300264: 0x00000140: dword 1 +0x12300268: 0x08000000: dword 2 +0x1230026c: 0x00000000: dword 3 +0x12300270: 0x55000403: dword 4 +0x12300274: 0x00040006: dword 5 +0x12300278: 0x00000000: dword 6 +0x1230027c: 0x00000240: dword 7 +0x12300280: 0x780f0000: 3DSTATE_SCISSOR_POINTERS +0x12300284: 0x00007be0: scissor rect offset +0x12300288: 0x7a000002: PIPE_CONTROL +0x1230028c: 0x00002000: no write, depth stall, +0x12300290: 0x00000000: +0x12300294: 0x00000000: +0x12300298: 0x7a000002: PIPE_CONTROL +0x1230029c: 0x00000001: no write, depth cache flush, +0x123002a0: 0x00000000: +0x123002a4: 0x00000000: +0x123002a8: 0x7a000002: PIPE_CONTROL +0x123002ac: 0x00002000: no write, depth stall, +0x123002b0: 0x00000000: +0x123002b4: 0x00000000: +0x123002b8: 0x78050005: 3DSTATE_DEPTH_BUFFER +0x123002bc: 0xe0040000: dword 1 +0x123002c0: 0x00000000: dword 2 +0x123002c4: 0x00000000: dword 3 +0x123002c8: 0x00000000: dword 4 +0x123002cc: 0x00000000: dword 5 +0x123002d0: 0x00000000: dword 6 +0x123002d4: 0x78070001: 3DSTATE_HIER_DEPTH_BUFFER +0x123002d8: 0x00000000: pitch 1b +0x123002dc: 0x00000000: pointer to HiZ buffer +0x123002e0: 0x78060001: 3DSTATE_STENCIL_BUFFER +0x123002e4: 0x00000000: dword 1 +0x123002e8: 0x00000000: dword 2 +0x123002ec: 0x78040001: 3DSTATE_CLEAR_PARAMS +0x123002f0: 0x00000000: dword 1 +0x123002f4: 0x00000000: dword 2 +0x123002f8: 0x79000002: 3DSTATE_DRAWING_RECTANGLE +0x123002fc: 0x00000000: top left: 0,0 +0x12300300: 0x00130077: bottom right: 119,19 +0x12300304: 0x00000000: origin: 0,0 +0x12300308: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x1230030c: 0x00004014: buffer 0: sequential, pitch 20b +0x12300310: 0x158b3000: buffer address +0x12300314: 0x158c2fff: max index +0x12300318: 0x00000000: mbz +0x1230031c: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x12300320: 0x02850000: buffer 0: valid, type 0x0085, src offset 0x0000 bytes +0x12300324: 0x11230000: (X, Y, 0.0, 1.0), dst offset 0x00 bytes +0x12300328: 0x02400008: buffer 0: valid, type 0x0040, src offset 0x0008 bytes +0x1230032c: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300330: 0x7b000005: 3DPRIMITIVE: +0x12300334: 0x00000007: quad list sequential +0x12300338: 0x00000004: vertex count +0x1230033c: 0x00000000: start vertex +0x12300340: 0x00000001: instance count +0x12300344: 0x00000000: start instance +0x12300348: 0x00000000: index bias +0x1230034c: 0x05000000: MI_BATCH_BUFFER_END diff --git a/intel/tests/gen7-3d.batch.sh b/intel/tests/gen7-3d.batch.sh new file mode 120000 index 0000000..796ca5f --- /dev/null +++ b/intel/tests/gen7-3d.batch.sh @@ -0,0 +1 @@ +test-batch.sh
\ No newline at end of file diff --git a/intel/tests/gm45-3d.batch b/intel/tests/gm45-3d.batch Binary files differnew file mode 100644 index 0000000..549608b --- /dev/null +++ b/intel/tests/gm45-3d.batch diff --git a/intel/tests/gm45-3d.batch-ref.txt b/intel/tests/gm45-3d.batch-ref.txt new file mode 100644 index 0000000..5a47d77 --- /dev/null +++ b/intel/tests/gm45-3d.batch-ref.txt @@ -0,0 +1,488 @@ +0x12300000: 0x69040000: 3DSTATE_PIPELINE_SELECT +0x12300004: 0x79090000: 3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP +0x12300008: 0x00000000: dword 1 +0x1230000c: 0x61020000: STATE_SIP +0x12300010: 0x00000000: dword 1 +0x12300014: 0x680b0000: 3DSTATE_VF_STATISTICS +0x12300018: 0x61010004: STATE_BASE_ADDRESS +0x1230001c: 0x00000001: general state base address 0x00000000 +0x12300020: 0x00000001: surface state base address 0x00000000 +0x12300024: 0x00000001: indirect state base address 0x00000000 +0x12300028: 0x00000001: general state upper bound disabled +0x1230002c: 0x00000001: indirect state upper bound disabled +0x12300030: 0x78010004: 3DSTATE_BINDING_TABLE_POINTERS +0x12300034: 0x00007e20: VS binding table +0x12300038: 0x00000000: GS binding table +0x1230003c: 0x00000000: Clip binding table +0x12300040: 0x00000000: SF binding table +0x12300044: 0x00007e20: WM binding table +0x12300048: 0x79010003: 3DSTATE_CONSTANT_COLOR +0x1230004c: 0x00000000: dword 1 +0x12300050: 0x00000000: dword 2 +0x12300054: 0x00000000: dword 3 +0x12300058: 0x00000000: dword 4 +0x1230005c: 0x79050004: 3DSTATE_DEPTH_BUFFER +0x12300060: 0x2c0805ff: 2D, z24s8, pitch = 1536 bytes, tiled +0x12300064: 0x00000000: depth offset +0x12300068: 0x09584ac0: 300x300 +0x1230006c: 0x00000000: volume depth +0x12300070: 0x00000000: +0x12300074: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300078: 0x00007d60: VS state +0x1230007c: 0x00000000: GS state +0x12300080: 0x00007d21: Clip state +0x12300084: 0x00007d80: SF state +0x12300088: 0x00007de0: WM state +0x1230008c: 0x00007fc0: CC state +0x12300090: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300094: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300098: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230009c: 0x60010000: CS_URB_STATE +0x123000a0: 0x00000024: entry_size: 2 [192 bytes], n_entries: 4 +0x123000a4: 0x79000002: 3DSTATE_DRAWING_RECTANGLE +0x123000a8: 0x00000000: top left: 0,0 +0x123000ac: 0x012b012b: bottom right: 299,299 +0x123000b0: 0x00000000: origin: 0,0 +0x123000b4: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123000b8: 0x0000000c: buffer 0: sequential, pitch 12b +0x123000bc: 0x00000000: buffer address +0x123000c0: 0x00000000: max index +0x123000c4: 0x00000000: mbz +0x123000c8: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x123000cc: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123000d0: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123000d4: 0x60020100: CONSTANT_BUFFER: valid +0x123000d8: 0x00000001: offset: 0x00000000, length: 128 bytes +0x123000dc: 0x7b001804: 3DPRIMITIVE: tri fan sequential +0x123000e0: 0x00000004: vertex count +0x123000e4: 0x00000000: start vertex +0x123000e8: 0x00000001: instance count +0x123000ec: 0x00000000: start instance +0x123000f0: 0x00000000: index bias +0x123000f4: 0x78010004: 3DSTATE_BINDING_TABLE_POINTERS +0x123000f8: 0x00007b40: VS binding table +0x123000fc: 0x00000000: GS binding table +0x12300100: 0x00000000: Clip binding table +0x12300104: 0x00000000: SF binding table +0x12300108: 0x00007b40: WM binding table +0x1230010c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300110: 0x00007aa0: VS state +0x12300114: 0x00007a41: GS state +0x12300118: 0x00007a61: Clip state +0x1230011c: 0x00007ac0: SF state +0x12300120: 0x00007b00: WM state +0x12300124: 0x00007cc0: CC state +0x12300128: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230012c: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300130: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x12300134: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300138: 0x0000000c: buffer 0: sequential, pitch 12b +0x1230013c: 0x00000000: buffer address +0x12300140: 0x00000000: max index +0x12300144: 0x00000000: mbz +0x12300148: 0x60020100: CONSTANT_BUFFER: valid +0x1230014c: 0x00000082: offset: 0x00000080, length: 192 bytes +0x12300150: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300154: 0x00000052: vertex count +0x12300158: 0x00000000: start vertex +0x1230015c: 0x00000001: instance count +0x12300160: 0x00000000: start instance +0x12300164: 0x00000000: index bias +0x12300168: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x1230016c: 0x00007aa0: VS state +0x12300170: 0x00007a21: GS state +0x12300174: 0x00007a61: Clip state +0x12300178: 0x00007ac0: SF state +0x1230017c: 0x00007b00: WM state +0x12300180: 0x00007cc0: CC state +0x12300184: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300188: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x1230018c: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x12300190: 0x60020100: CONSTANT_BUFFER: valid +0x12300194: 0x00000082: offset: 0x00000080, length: 192 bytes +0x12300198: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x1230019c: 0x00000050: vertex count +0x123001a0: 0x00000052: start vertex +0x123001a4: 0x00000001: instance count +0x123001a8: 0x00000000: start instance +0x123001ac: 0x00000000: index bias +0x123001b0: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123001b4: 0x00007aa0: VS state +0x123001b8: 0x00007a01: GS state +0x123001bc: 0x00007a61: Clip state +0x123001c0: 0x00007ac0: SF state +0x123001c4: 0x00007b00: WM state +0x123001c8: 0x00007cc0: CC state +0x123001cc: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123001d0: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x123001d4: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x123001d8: 0x60020100: CONSTANT_BUFFER: valid +0x123001dc: 0x00000142: offset: 0x00000140, length: 192 bytes +0x123001e0: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123001e4: 0x00000052: vertex count +0x123001e8: 0x000000a2: start vertex +0x123001ec: 0x00000001: instance count +0x123001f0: 0x00000000: start instance +0x123001f4: 0x00000000: index bias +0x123001f8: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123001fc: 0x00007aa0: VS state +0x12300200: 0x000079e1: GS state +0x12300204: 0x00007a61: Clip state +0x12300208: 0x00007ac0: SF state +0x1230020c: 0x00007b00: WM state +0x12300210: 0x00007cc0: CC state +0x12300214: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300218: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x1230021c: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x12300220: 0x60020100: CONSTANT_BUFFER: valid +0x12300224: 0x00000142: offset: 0x00000140, length: 192 bytes +0x12300228: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x1230022c: 0x00000050: vertex count +0x12300230: 0x000000f4: start vertex +0x12300234: 0x00000001: instance count +0x12300238: 0x00000000: start instance +0x1230023c: 0x00000000: index bias +0x12300240: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300244: 0x00007aa0: VS state +0x12300248: 0x000079c1: GS state +0x1230024c: 0x00007a61: Clip state +0x12300250: 0x00007ac0: SF state +0x12300254: 0x00007b00: WM state +0x12300258: 0x00007cc0: CC state +0x1230025c: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300260: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300264: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x12300268: 0x60020100: CONSTANT_BUFFER: valid +0x1230026c: 0x00000142: offset: 0x00000140, length: 192 bytes +0x12300270: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300274: 0x000079a0: VS state +0x12300278: 0x000079c1: GS state +0x1230027c: 0x00007a61: Clip state +0x12300280: 0x00007ac0: SF state +0x12300284: 0x00007b00: WM state +0x12300288: 0x00007cc0: CC state +0x1230028c: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300290: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300294: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x12300298: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x1230029c: 0x00000018: buffer 0: sequential, pitch 24b +0x123002a0: 0x00000f48: buffer address +0x123002a4: 0x00000000: max index +0x123002a8: 0x00000000: mbz +0x123002ac: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x123002b0: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123002b4: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123002b8: 0x0440000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x123002bc: 0x11130004: (X, Y, Z, 1.0), dst offset 0x10 bytes +0x123002c0: 0x60020100: CONSTANT_BUFFER: valid +0x123002c4: 0x00000202: offset: 0x00000200, length: 192 bytes +0x123002c8: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123002cc: 0x000000a2: vertex count +0x123002d0: 0x00000000: start vertex +0x123002d4: 0x00000001: instance count +0x123002d8: 0x00000000: start instance +0x123002dc: 0x00000000: index bias +0x123002e0: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123002e4: 0x000079a0: VS state +0x123002e8: 0x00000000: GS state +0x123002ec: 0x00007901: Clip state +0x123002f0: 0x00007940: SF state +0x123002f4: 0x00007960: WM state +0x123002f8: 0x00007cc0: CC state +0x123002fc: 0x00000000: MI_NOOP +0x12300300: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300304: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300308: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230030c: 0x60020100: CONSTANT_BUFFER: valid +0x12300310: 0x00000202: offset: 0x00000200, length: 192 bytes +0x12300314: 0x7b001404: 3DPRIMITIVE: tri strip sequential +0x12300318: 0x0000002a: vertex count +0x1230031c: 0x000000a2: start vertex +0x12300320: 0x00000001: instance count +0x12300324: 0x00000000: start instance +0x12300328: 0x00000000: index bias +0x1230032c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300330: 0x00007860: VS state +0x12300334: 0x00007801: GS state +0x12300338: 0x00007821: Clip state +0x1230033c: 0x00007880: SF state +0x12300340: 0x000078a0: WM state +0x12300344: 0x00007cc0: CC state +0x12300348: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230034c: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300350: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x12300354: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300358: 0x0000000c: buffer 0: sequential, pitch 12b +0x1230035c: 0x00002268: buffer address +0x12300360: 0x00000000: max index +0x12300364: 0x00000000: mbz +0x12300368: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x1230036c: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300370: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x12300374: 0x60020100: CONSTANT_BUFFER: valid +0x12300378: 0x000002c2: offset: 0x000002c0, length: 192 bytes +0x1230037c: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300380: 0x0000002a: vertex count +0x12300384: 0x00000000: start vertex +0x12300388: 0x00000001: instance count +0x1230038c: 0x00000000: start instance +0x12300390: 0x00000000: index bias +0x12300394: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300398: 0x00007860: VS state +0x1230039c: 0x000077e1: GS state +0x123003a0: 0x00007821: Clip state +0x123003a4: 0x00007880: SF state +0x123003a8: 0x000078a0: WM state +0x123003ac: 0x00007cc0: CC state +0x123003b0: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123003b4: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x123003b8: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x123003bc: 0x60020100: CONSTANT_BUFFER: valid +0x123003c0: 0x000002c2: offset: 0x000002c0, length: 192 bytes +0x123003c4: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x123003c8: 0x00000028: vertex count +0x123003cc: 0x0000002a: start vertex +0x123003d0: 0x00000001: instance count +0x123003d4: 0x00000000: start instance +0x123003d8: 0x00000000: index bias +0x123003dc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123003e0: 0x00007860: VS state +0x123003e4: 0x000077c1: GS state +0x123003e8: 0x00007821: Clip state +0x123003ec: 0x00007880: SF state +0x123003f0: 0x000078a0: WM state +0x123003f4: 0x00007cc0: CC state +0x123003f8: 0x00000000: MI_NOOP +0x123003fc: 0x00000000: MI_NOOP +0x12300400: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300404: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300408: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230040c: 0x60020100: CONSTANT_BUFFER: valid +0x12300410: 0x00000382: offset: 0x00000380, length: 192 bytes +0x12300414: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300418: 0x0000002a: vertex count +0x1230041c: 0x00000052: start vertex +0x12300420: 0x00000001: instance count +0x12300424: 0x00000000: start instance +0x12300428: 0x00000000: index bias +0x1230042c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300430: 0x00007860: VS state +0x12300434: 0x000077a1: GS state +0x12300438: 0x00007821: Clip state +0x1230043c: 0x00007880: SF state +0x12300440: 0x000078a0: WM state +0x12300444: 0x00007cc0: CC state +0x12300448: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x1230044c: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300450: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x12300454: 0x60020100: CONSTANT_BUFFER: valid +0x12300458: 0x00000382: offset: 0x00000380, length: 192 bytes +0x1230045c: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300460: 0x00000028: vertex count +0x12300464: 0x0000007c: start vertex +0x12300468: 0x00000001: instance count +0x1230046c: 0x00000000: start instance +0x12300470: 0x00000000: index bias +0x12300474: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300478: 0x00007860: VS state +0x1230047c: 0x00007781: GS state +0x12300480: 0x00007821: Clip state +0x12300484: 0x00007880: SF state +0x12300488: 0x000078a0: WM state +0x1230048c: 0x00007cc0: CC state +0x12300490: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300494: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300498: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230049c: 0x60020100: CONSTANT_BUFFER: valid +0x123004a0: 0x00000382: offset: 0x00000380, length: 192 bytes +0x123004a4: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123004a8: 0x00007760: VS state +0x123004ac: 0x00007781: GS state +0x123004b0: 0x00007821: Clip state +0x123004b4: 0x00007880: SF state +0x123004b8: 0x000078a0: WM state +0x123004bc: 0x00007cc0: CC state +0x123004c0: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123004c4: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x123004c8: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x123004cc: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x123004d0: 0x00000018: buffer 0: sequential, pitch 24b +0x123004d4: 0x00002a30: buffer address +0x123004d8: 0x00000000: max index +0x123004dc: 0x00000000: mbz +0x123004e0: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x123004e4: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123004e8: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123004ec: 0x0440000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x123004f0: 0x11130004: (X, Y, Z, 1.0), dst offset 0x10 bytes +0x123004f4: 0x60020100: CONSTANT_BUFFER: valid +0x123004f8: 0x00000442: offset: 0x00000440, length: 192 bytes +0x123004fc: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300500: 0x00000052: vertex count +0x12300504: 0x00000000: start vertex +0x12300508: 0x00000001: instance count +0x1230050c: 0x00000000: start instance +0x12300510: 0x00000000: index bias +0x12300514: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300518: 0x00007760: VS state +0x1230051c: 0x00000000: GS state +0x12300520: 0x000076c1: Clip state +0x12300524: 0x00007700: SF state +0x12300528: 0x00007720: WM state +0x1230052c: 0x00007cc0: CC state +0x12300530: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300534: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300538: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230053c: 0x60020100: CONSTANT_BUFFER: valid +0x12300540: 0x00000442: offset: 0x00000440, length: 192 bytes +0x12300544: 0x7b001404: 3DPRIMITIVE: tri strip sequential +0x12300548: 0x00000016: vertex count +0x1230054c: 0x00000052: start vertex +0x12300550: 0x00000001: instance count +0x12300554: 0x00000000: start instance +0x12300558: 0x00000000: index bias +0x1230055c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300560: 0x00007620: VS state +0x12300564: 0x000075c1: GS state +0x12300568: 0x000075e1: Clip state +0x1230056c: 0x00007640: SF state +0x12300570: 0x00007660: WM state +0x12300574: 0x00007cc0: CC state +0x12300578: 0x00000000: MI_NOOP +0x1230057c: 0x00000000: MI_NOOP +0x12300580: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300584: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300588: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230058c: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300590: 0x0000000c: buffer 0: sequential, pitch 12b +0x12300594: 0x000033f0: buffer address +0x12300598: 0x00000000: max index +0x1230059c: 0x00000000: mbz +0x123005a0: 0x78090001: 3DSTATE_VERTEX_ELEMENTS +0x123005a4: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x123005a8: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x123005ac: 0x60020100: CONSTANT_BUFFER: valid +0x123005b0: 0x00000502: offset: 0x00000500, length: 192 bytes +0x123005b4: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x123005b8: 0x0000002a: vertex count +0x123005bc: 0x00000000: start vertex +0x123005c0: 0x00000001: instance count +0x123005c4: 0x00000000: start instance +0x123005c8: 0x00000000: index bias +0x123005cc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123005d0: 0x00007620: VS state +0x123005d4: 0x000075a1: GS state +0x123005d8: 0x000075e1: Clip state +0x123005dc: 0x00007640: SF state +0x123005e0: 0x00007660: WM state +0x123005e4: 0x00007cc0: CC state +0x123005e8: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123005ec: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x123005f0: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x123005f4: 0x60020100: CONSTANT_BUFFER: valid +0x123005f8: 0x00000502: offset: 0x00000500, length: 192 bytes +0x123005fc: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300600: 0x00000028: vertex count +0x12300604: 0x0000002a: start vertex +0x12300608: 0x00000001: instance count +0x1230060c: 0x00000000: start instance +0x12300610: 0x00000000: index bias +0x12300614: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300618: 0x00007620: VS state +0x1230061c: 0x00007581: GS state +0x12300620: 0x000075e1: Clip state +0x12300624: 0x00007640: SF state +0x12300628: 0x00007660: WM state +0x1230062c: 0x00007cc0: CC state +0x12300630: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300634: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300638: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230063c: 0x60020100: CONSTANT_BUFFER: valid +0x12300640: 0x000005c2: offset: 0x000005c0, length: 192 bytes +0x12300644: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300648: 0x0000002a: vertex count +0x1230064c: 0x00000052: start vertex +0x12300650: 0x00000001: instance count +0x12300654: 0x00000000: start instance +0x12300658: 0x00000000: index bias +0x1230065c: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300660: 0x00007620: VS state +0x12300664: 0x00007561: GS state +0x12300668: 0x000075e1: Clip state +0x1230066c: 0x00007640: SF state +0x12300670: 0x00007660: WM state +0x12300674: 0x00007cc0: CC state +0x12300678: 0x00000000: MI_NOOP +0x1230067c: 0x00000000: MI_NOOP +0x12300680: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300684: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300688: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230068c: 0x60020100: CONSTANT_BUFFER: valid +0x12300690: 0x000005c2: offset: 0x000005c0, length: 192 bytes +0x12300694: 0x7b001c04: 3DPRIMITIVE: quad list sequential +0x12300698: 0x00000028: vertex count +0x1230069c: 0x0000007c: start vertex +0x123006a0: 0x00000001: instance count +0x123006a4: 0x00000000: start instance +0x123006a8: 0x00000000: index bias +0x123006ac: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123006b0: 0x00007620: VS state +0x123006b4: 0x00007541: GS state +0x123006b8: 0x000075e1: Clip state +0x123006bc: 0x00007640: SF state +0x123006c0: 0x00007660: WM state +0x123006c4: 0x00007cc0: CC state +0x123006c8: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x123006cc: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x123006d0: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x123006d4: 0x60020100: CONSTANT_BUFFER: valid +0x123006d8: 0x000005c2: offset: 0x000005c0, length: 192 bytes +0x123006dc: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x123006e0: 0x00007520: VS state +0x123006e4: 0x00007541: GS state +0x123006e8: 0x000075e1: Clip state +0x123006ec: 0x00007640: SF state +0x123006f0: 0x00007660: WM state +0x123006f4: 0x00007cc0: CC state +0x123006f8: 0x00000000: MI_NOOP +0x123006fc: 0x00000000: MI_NOOP +0x12300700: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300704: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300708: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230070c: 0x78080003: 3DSTATE_VERTEX_BUFFERS +0x12300710: 0x00000018: buffer 0: sequential, pitch 24b +0x12300714: 0x00003bb8: buffer address +0x12300718: 0x00000000: max index +0x1230071c: 0x00000000: mbz +0x12300720: 0x78090003: 3DSTATE_VERTEX_ELEMENTS +0x12300724: 0x04400000: buffer 0: valid, type 0x0040, src offset 0x0000 bytes +0x12300728: 0x11130000: (X, Y, Z, 1.0), dst offset 0x00 bytes +0x1230072c: 0x0440000c: buffer 0: valid, type 0x0040, src offset 0x000c bytes +0x12300730: 0x11130004: (X, Y, Z, 1.0), dst offset 0x10 bytes +0x12300734: 0x60020100: CONSTANT_BUFFER: valid +0x12300738: 0x00000682: offset: 0x00000680, length: 192 bytes +0x1230073c: 0x7b002004: 3DPRIMITIVE: quad strip sequential +0x12300740: 0x00000052: vertex count +0x12300744: 0x00000000: start vertex +0x12300748: 0x00000001: instance count +0x1230074c: 0x00000000: start instance +0x12300750: 0x00000000: index bias +0x12300754: 0x78000005: 3DSTATE_PIPELINED_POINTERS +0x12300758: 0x00007520: VS state +0x1230075c: 0x00000000: GS state +0x12300760: 0x00007481: Clip state +0x12300764: 0x000074c0: SF state +0x12300768: 0x000074e0: WM state +0x1230076c: 0x00007cc0: CC state +0x12300770: 0x60003f01: URB_FENCE: cs vfe sf clip gs vs +0x12300774: 0x05212040: vs fence: 64, clip_fence: 82, gs_fence: 72 +0x12300778: 0x18000062: sf fence: 98, vfe_fence: 0, cs_fence: 384 +0x1230077c: 0x60020100: CONSTANT_BUFFER: valid +0x12300780: 0x00000682: offset: 0x00000680, length: 192 bytes +0x12300784: 0x7b001404: 3DPRIMITIVE: tri strip sequential +0x12300788: 0x00000016: vertex count +0x1230078c: 0x00000052: start vertex +0x12300790: 0x00000001: instance count +0x12300794: 0x00000000: start instance +0x12300798: 0x00000000: index bias +0x1230079c: 0x05000000: MI_BATCH_BUFFER_END diff --git a/intel/tests/gm45-3d.batch.sh b/intel/tests/gm45-3d.batch.sh new file mode 120000 index 0000000..796ca5f --- /dev/null +++ b/intel/tests/gm45-3d.batch.sh @@ -0,0 +1 @@ +test-batch.sh
\ No newline at end of file diff --git a/intel/tests/test-batch.sh b/intel/tests/test-batch.sh new file mode 100755 index 0000000..b85f639 --- /dev/null +++ b/intel/tests/test-batch.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +TEST_FILENAME=`echo "$0" | sed 's|\.sh$||'` +./test_decode $TEST_FILENAME + +ret=$? + +# pretty-print a diff showing what happened, and leave the dumped +# around for possibly moving over the ref. +if test $ret = 1; then + REF_FILENAME="$TEST_FILENAME-ref.txt" + NEW_FILENAME="$TEST_FILENAME-new.txt" + ./test_decode $TEST_FILENAME -dump > $NEW_FILENAME + if test $? = 0; then + echo "Differences:" + diff -u $REF_FILENAME $NEW_FILENAME + fi +fi + +exit $ret diff --git a/intel/uthash.h b/intel/uthash.h new file mode 100644 index 0000000..62e1650 --- /dev/null +++ b/intel/uthash.h @@ -0,0 +1,1074 @@ +/* +Copyright (c) 2003-2016, Troy D. Hanson http://troydhanson.github.com/uthash/ +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTHASH_H +#define UTHASH_H + +#define UTHASH_VERSION 2.0.1 + +#include <string.h> /* memcmp,strlen */ +#include <stddef.h> /* ptrdiff_t */ +#include <stdlib.h> /* exit() */ + +/* These macros use decltype or the earlier __typeof GNU extension. + As decltype is only available in newer compilers (VS2010 or gcc 4.3+ + when compiling c++ source) this code uses whatever method is needed + or, for VS2008 where neither is available, uses casting workarounds. */ +#if defined(_MSC_VER) /* MS compiler */ +#if _MSC_VER >= 1600 && defined(__cplusplus) /* VS2010 or newer in C++ mode */ +#define DECLTYPE(x) (decltype(x)) +#else /* VS2008 or older (or VS2010 in C mode) */ +#define NO_DECLTYPE +#define DECLTYPE(x) +#endif +#elif defined(__BORLANDC__) || defined(__LCC__) || defined(__WATCOMC__) +#define NO_DECLTYPE +#define DECLTYPE(x) +#else /* GNU, Sun and other compilers */ +#define DECLTYPE(x) (__typeof(x)) +#endif + +#ifdef NO_DECLTYPE +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + char **_da_dst = (char**)(&(dst)); \ + *_da_dst = (char*)(src); \ +} while (0) +#else +#define DECLTYPE_ASSIGN(dst,src) \ +do { \ + (dst) = DECLTYPE(dst)(src); \ +} while (0) +#endif + +/* a number of the hash function use uint32_t which isn't defined on Pre VS2010 */ +#if defined(_WIN32) +#if defined(_MSC_VER) && _MSC_VER >= 1600 +#include <stdint.h> +#elif defined(__WATCOMC__) || defined(__MINGW32__) || defined(__CYGWIN__) +#include <stdint.h> +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif +#elif defined(__GNUC__) && !defined(__VXWORKS__) +#include <stdint.h> +#else +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; +#endif + +#ifndef uthash_fatal +#define uthash_fatal(msg) exit(-1) /* fatal error (out of memory,etc) */ +#endif +#ifndef uthash_malloc +#define uthash_malloc(sz) malloc(sz) /* malloc fcn */ +#endif +#ifndef uthash_free +#define uthash_free(ptr,sz) free(ptr) /* free fcn */ +#endif +#ifndef uthash_strlen +#define uthash_strlen(s) strlen(s) +#endif +#ifndef uthash_memcmp +#define uthash_memcmp(a,b,n) memcmp(a,b,n) +#endif + +#ifndef uthash_noexpand_fyi +#define uthash_noexpand_fyi(tbl) /* can be defined to log noexpand */ +#endif +#ifndef uthash_expand_fyi +#define uthash_expand_fyi(tbl) /* can be defined to log expands */ +#endif + +/* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS 32U /* initial number of buckets */ +#define HASH_INITIAL_NUM_BUCKETS_LOG2 5U /* lg2 of initial number of buckets */ +#define HASH_BKT_CAPACITY_THRESH 10U /* expand when bucket count reaches */ + +/* calculate the element whose hash handle address is hhp */ +#define ELMT_FROM_HH(tbl,hhp) ((void*)(((char*)(hhp)) - ((tbl)->hho))) +/* calculate the hash handle from element address elp */ +#define HH_FROM_ELMT(tbl,elp) ((UT_hash_handle *)(((char*)(elp)) + ((tbl)->hho))) + +#define HASH_VALUE(keyptr,keylen,hashv) \ +do { \ + HASH_FCN(keyptr, keylen, hashv); \ +} while (0) + +#define HASH_FIND_BYHASHVALUE(hh,head,keyptr,keylen,hashval,out) \ +do { \ + (out) = NULL; \ + if (head) { \ + unsigned _hf_bkt; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _hf_bkt); \ + if (HASH_BLOOM_TEST((head)->hh.tbl, hashval) != 0) { \ + HASH_FIND_IN_BKT((head)->hh.tbl, hh, (head)->hh.tbl->buckets[ _hf_bkt ], keyptr, keylen, hashval, out); \ + } \ + } \ +} while (0) + +#define HASH_FIND(hh,head,keyptr,keylen,out) \ +do { \ + unsigned _hf_hashv; \ + HASH_VALUE(keyptr, keylen, _hf_hashv); \ + HASH_FIND_BYHASHVALUE(hh, head, keyptr, keylen, _hf_hashv, out); \ +} while (0) + +#ifdef HASH_BLOOM +#define HASH_BLOOM_BITLEN (1UL << HASH_BLOOM) +#define HASH_BLOOM_BYTELEN (HASH_BLOOM_BITLEN/8UL) + (((HASH_BLOOM_BITLEN%8UL)!=0UL) ? 1UL : 0UL) +#define HASH_BLOOM_MAKE(tbl) \ +do { \ + (tbl)->bloom_nbits = HASH_BLOOM; \ + (tbl)->bloom_bv = (uint8_t*)uthash_malloc(HASH_BLOOM_BYTELEN); \ + if (!((tbl)->bloom_bv)) { uthash_fatal( "out of memory"); } \ + memset((tbl)->bloom_bv, 0, HASH_BLOOM_BYTELEN); \ + (tbl)->bloom_sig = HASH_BLOOM_SIGNATURE; \ +} while (0) + +#define HASH_BLOOM_FREE(tbl) \ +do { \ + uthash_free((tbl)->bloom_bv, HASH_BLOOM_BYTELEN); \ +} while (0) + +#define HASH_BLOOM_BITSET(bv,idx) (bv[(idx)/8U] |= (1U << ((idx)%8U))) +#define HASH_BLOOM_BITTEST(bv,idx) (bv[(idx)/8U] & (1U << ((idx)%8U))) + +#define HASH_BLOOM_ADD(tbl,hashv) \ + HASH_BLOOM_BITSET((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + +#define HASH_BLOOM_TEST(tbl,hashv) \ + HASH_BLOOM_BITTEST((tbl)->bloom_bv, (hashv & (uint32_t)((1ULL << (tbl)->bloom_nbits) - 1U))) + +#else +#define HASH_BLOOM_MAKE(tbl) +#define HASH_BLOOM_FREE(tbl) +#define HASH_BLOOM_ADD(tbl,hashv) +#define HASH_BLOOM_TEST(tbl,hashv) (1) +#define HASH_BLOOM_BYTELEN 0U +#endif + +#define HASH_MAKE_TABLE(hh,head) \ +do { \ + (head)->hh.tbl = (UT_hash_table*)uthash_malloc( \ + sizeof(UT_hash_table)); \ + if (!((head)->hh.tbl)) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl, 0, sizeof(UT_hash_table)); \ + (head)->hh.tbl->tail = &((head)->hh); \ + (head)->hh.tbl->num_buckets = HASH_INITIAL_NUM_BUCKETS; \ + (head)->hh.tbl->log2_num_buckets = HASH_INITIAL_NUM_BUCKETS_LOG2; \ + (head)->hh.tbl->hho = (char*)(&(head)->hh) - (char*)(head); \ + (head)->hh.tbl->buckets = (UT_hash_bucket*)uthash_malloc( \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + if (! (head)->hh.tbl->buckets) { uthash_fatal( "out of memory"); } \ + memset((head)->hh.tbl->buckets, 0, \ + HASH_INITIAL_NUM_BUCKETS*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_MAKE((head)->hh.tbl); \ + (head)->hh.tbl->signature = HASH_SIGNATURE; \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,replaced,cmpfcn) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn); \ +} while (0) + +#define HASH_REPLACE_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add,replaced) \ +do { \ + (replaced) = NULL; \ + HASH_FIND_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, replaced); \ + if (replaced) { \ + HASH_DELETE(hh, head, replaced); \ + } \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add); \ +} while (0) + +#define HASH_REPLACE(hh,head,fieldname,keylen_in,add,replaced) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced); \ +} while (0) + +#define HASH_REPLACE_INORDER(hh,head,fieldname,keylen_in,add,replaced,cmpfcn) \ +do { \ + unsigned _hr_hashv; \ + HASH_VALUE(&((add)->fieldname), keylen_in, _hr_hashv); \ + HASH_REPLACE_BYHASHVALUE_INORDER(hh, head, fieldname, keylen_in, _hr_hashv, add, replaced, cmpfcn); \ +} while (0) + +#define HASH_APPEND_LIST(hh, head, add) \ +do { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = ELMT_FROM_HH((head)->hh.tbl, (head)->hh.tbl->tail); \ + (head)->hh.tbl->tail->next = (add); \ + (head)->hh.tbl->tail = &((add)->hh); \ +} while (0) + +#define HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh,head,keyptr,keylen_in,hashval,add,cmpfcn) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + (head) = (add); \ + HASH_MAKE_TABLE(hh, head); \ + } else { \ + struct UT_hash_handle *_hs_iter = &(head)->hh; \ + (add)->hh.tbl = (head)->hh.tbl; \ + do { \ + if (cmpfcn(DECLTYPE(head) ELMT_FROM_HH((head)->hh.tbl, _hs_iter), add) > 0) \ + break; \ + } while ((_hs_iter = _hs_iter->next)); \ + if (_hs_iter) { \ + (add)->hh.next = _hs_iter; \ + if (((add)->hh.prev = _hs_iter->prev)) { \ + HH_FROM_ELMT((head)->hh.tbl, _hs_iter->prev)->next = (add); \ + } else { \ + (head) = (add); \ + } \ + _hs_iter->prev = (add); \ + } else { \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + } \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + HASH_FSCK(hh, head); \ +} while (0) + +#define HASH_ADD_KEYPTR_INORDER(hh,head,keyptr,keylen_in,add,cmpfcn) \ +do { \ + unsigned _hs_hashv; \ + HASH_VALUE(keyptr, keylen_in, _hs_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, keyptr, keylen_in, _hs_hashv, add, cmpfcn); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE_INORDER(hh,head,fieldname,keylen_in,hashval,add,cmpfcn) \ + HASH_ADD_KEYPTR_BYHASHVALUE_INORDER(hh, head, &((add)->fieldname), keylen_in, hashval, add, cmpfcn) + +#define HASH_ADD_INORDER(hh,head,fieldname,keylen_in,add,cmpfcn) \ + HASH_ADD_KEYPTR_INORDER(hh, head, &((add)->fieldname), keylen_in, add, cmpfcn) + +#define HASH_ADD_KEYPTR_BYHASHVALUE(hh,head,keyptr,keylen_in,hashval,add) \ +do { \ + unsigned _ha_bkt; \ + (add)->hh.hashv = (hashval); \ + (add)->hh.key = (char*) (keyptr); \ + (add)->hh.keylen = (unsigned) (keylen_in); \ + if (!(head)) { \ + (add)->hh.next = NULL; \ + (add)->hh.prev = NULL; \ + (head) = (add); \ + HASH_MAKE_TABLE(hh, head); \ + } else { \ + (add)->hh.tbl = (head)->hh.tbl; \ + HASH_APPEND_LIST(hh, head, add); \ + } \ + (head)->hh.tbl->num_items++; \ + HASH_TO_BKT(hashval, (head)->hh.tbl->num_buckets, _ha_bkt); \ + HASH_ADD_TO_BKT((head)->hh.tbl->buckets[_ha_bkt], &(add)->hh); \ + HASH_BLOOM_ADD((head)->hh.tbl, hashval); \ + HASH_EMIT_KEY(hh, head, keyptr, keylen_in); \ + HASH_FSCK(hh, head); \ +} while (0) + +#define HASH_ADD_KEYPTR(hh,head,keyptr,keylen_in,add) \ +do { \ + unsigned _ha_hashv; \ + HASH_VALUE(keyptr, keylen_in, _ha_hashv); \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, keyptr, keylen_in, _ha_hashv, add); \ +} while (0) + +#define HASH_ADD_BYHASHVALUE(hh,head,fieldname,keylen_in,hashval,add) \ + HASH_ADD_KEYPTR_BYHASHVALUE(hh, head, &((add)->fieldname), keylen_in, hashval, add) + +#define HASH_ADD(hh,head,fieldname,keylen_in,add) \ + HASH_ADD_KEYPTR(hh, head, &((add)->fieldname), keylen_in, add) + +#define HASH_TO_BKT(hashv,num_bkts,bkt) \ +do { \ + bkt = ((hashv) & ((num_bkts) - 1U)); \ +} while (0) + +/* delete "delptr" from the hash table. + * "the usual" patch-up process for the app-order doubly-linked-list. + * The use of _hd_hh_del below deserves special explanation. + * These used to be expressed using (delptr) but that led to a bug + * if someone used the same symbol for the head and deletee, like + * HASH_DELETE(hh,users,users); + * We want that to work, but by changing the head (users) below + * we were forfeiting our ability to further refer to the deletee (users) + * in the patch-up process. Solution: use scratch space to + * copy the deletee pointer, then the latter references are via that + * scratch pointer rather than through the repointed (users) symbol. + */ +#define HASH_DELETE(hh,head,delptr) \ +do { \ + struct UT_hash_handle *_hd_hh_del; \ + if ( ((delptr)->hh.prev == NULL) && ((delptr)->hh.next == NULL) ) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + head = NULL; \ + } else { \ + unsigned _hd_bkt; \ + _hd_hh_del = &((delptr)->hh); \ + if ((delptr) == ELMT_FROM_HH((head)->hh.tbl,(head)->hh.tbl->tail)) { \ + (head)->hh.tbl->tail = \ + (UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho); \ + } \ + if ((delptr)->hh.prev != NULL) { \ + ((UT_hash_handle*)((ptrdiff_t)((delptr)->hh.prev) + \ + (head)->hh.tbl->hho))->next = (delptr)->hh.next; \ + } else { \ + DECLTYPE_ASSIGN(head,(delptr)->hh.next); \ + } \ + if (_hd_hh_del->next != NULL) { \ + ((UT_hash_handle*)((ptrdiff_t)_hd_hh_del->next + \ + (head)->hh.tbl->hho))->prev = \ + _hd_hh_del->prev; \ + } \ + HASH_TO_BKT( _hd_hh_del->hashv, (head)->hh.tbl->num_buckets, _hd_bkt); \ + HASH_DEL_IN_BKT(hh,(head)->hh.tbl->buckets[_hd_bkt], _hd_hh_del); \ + (head)->hh.tbl->num_items--; \ + } \ + HASH_FSCK(hh,head); \ +} while (0) + + +/* convenience forms of HASH_FIND/HASH_ADD/HASH_DEL */ +#define HASH_FIND_STR(head,findstr,out) \ + HASH_FIND(hh,head,findstr,(unsigned)uthash_strlen(findstr),out) +#define HASH_ADD_STR(head,strfield,add) \ + HASH_ADD(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add) +#define HASH_REPLACE_STR(head,strfield,add,replaced) \ + HASH_REPLACE(hh,head,strfield[0],(unsigned)uthash_strlen(add->strfield),add,replaced) +#define HASH_FIND_INT(head,findint,out) \ + HASH_FIND(hh,head,findint,sizeof(int),out) +#define HASH_ADD_INT(head,intfield,add) \ + HASH_ADD(hh,head,intfield,sizeof(int),add) +#define HASH_REPLACE_INT(head,intfield,add,replaced) \ + HASH_REPLACE(hh,head,intfield,sizeof(int),add,replaced) +#define HASH_FIND_PTR(head,findptr,out) \ + HASH_FIND(hh,head,findptr,sizeof(void *),out) +#define HASH_ADD_PTR(head,ptrfield,add) \ + HASH_ADD(hh,head,ptrfield,sizeof(void *),add) +#define HASH_REPLACE_PTR(head,ptrfield,add,replaced) \ + HASH_REPLACE(hh,head,ptrfield,sizeof(void *),add,replaced) +#define HASH_DEL(head,delptr) \ + HASH_DELETE(hh,head,delptr) + +/* HASH_FSCK checks hash integrity on every add/delete when HASH_DEBUG is defined. + * This is for uthash developer only; it compiles away if HASH_DEBUG isn't defined. + */ +#ifdef HASH_DEBUG +#define HASH_OOPS(...) do { fprintf(stderr,__VA_ARGS__); exit(-1); } while (0) +#define HASH_FSCK(hh,head) \ +do { \ + struct UT_hash_handle *_thh; \ + if (head) { \ + unsigned _bkt_i; \ + unsigned _count; \ + char *_prev; \ + _count = 0; \ + for( _bkt_i = 0; _bkt_i < (head)->hh.tbl->num_buckets; _bkt_i++) { \ + unsigned _bkt_count = 0; \ + _thh = (head)->hh.tbl->buckets[_bkt_i].hh_head; \ + _prev = NULL; \ + while (_thh) { \ + if (_prev != (char*)(_thh->hh_prev)) { \ + HASH_OOPS("invalid hh_prev %p, actual %p\n", \ + _thh->hh_prev, _prev ); \ + } \ + _bkt_count++; \ + _prev = (char*)(_thh); \ + _thh = _thh->hh_next; \ + } \ + _count += _bkt_count; \ + if ((head)->hh.tbl->buckets[_bkt_i].count != _bkt_count) { \ + HASH_OOPS("invalid bucket count %u, actual %u\n", \ + (head)->hh.tbl->buckets[_bkt_i].count, _bkt_count); \ + } \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid hh item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + /* traverse hh in app order; check next/prev integrity, count */ \ + _count = 0; \ + _prev = NULL; \ + _thh = &(head)->hh; \ + while (_thh) { \ + _count++; \ + if (_prev !=(char*)(_thh->prev)) { \ + HASH_OOPS("invalid prev %p, actual %p\n", \ + _thh->prev, _prev ); \ + } \ + _prev = (char*)ELMT_FROM_HH((head)->hh.tbl, _thh); \ + _thh = ( _thh->next ? (UT_hash_handle*)((char*)(_thh->next) + \ + (head)->hh.tbl->hho) : NULL ); \ + } \ + if (_count != (head)->hh.tbl->num_items) { \ + HASH_OOPS("invalid app item count %u, actual %u\n", \ + (head)->hh.tbl->num_items, _count ); \ + } \ + } \ +} while (0) +#else +#define HASH_FSCK(hh,head) +#endif + +/* When compiled with -DHASH_EMIT_KEYS, length-prefixed keys are emitted to + * the descriptor to which this macro is defined for tuning the hash function. + * The app can #include <unistd.h> to get the prototype for write(2). */ +#ifdef HASH_EMIT_KEYS +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) \ +do { \ + unsigned _klen = fieldlen; \ + write(HASH_EMIT_KEYS, &_klen, sizeof(_klen)); \ + write(HASH_EMIT_KEYS, keyptr, (unsigned long)fieldlen); \ +} while (0) +#else +#define HASH_EMIT_KEY(hh,head,keyptr,fieldlen) +#endif + +/* default to Jenkin's hash unless overridden e.g. DHASH_FUNCTION=HASH_SAX */ +#ifdef HASH_FUNCTION +#define HASH_FCN HASH_FUNCTION +#else +#define HASH_FCN HASH_JEN +#endif + +/* The Bernstein hash function, used in Perl prior to v5.6. Note (x<<5+x)=x*33. */ +#define HASH_BER(key,keylen,hashv) \ +do { \ + unsigned _hb_keylen=(unsigned)keylen; \ + const unsigned char *_hb_key=(const unsigned char*)(key); \ + (hashv) = 0; \ + while (_hb_keylen-- != 0U) { \ + (hashv) = (((hashv) << 5) + (hashv)) + *_hb_key++; \ + } \ +} while (0) + + +/* SAX/FNV/OAT/JEN hash functions are macro variants of those listed at + * http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx */ +#define HASH_SAX(key,keylen,hashv) \ +do { \ + unsigned _sx_i; \ + const unsigned char *_hs_key=(const unsigned char*)(key); \ + hashv = 0; \ + for(_sx_i=0; _sx_i < keylen; _sx_i++) { \ + hashv ^= (hashv << 5) + (hashv >> 2) + _hs_key[_sx_i]; \ + } \ +} while (0) +/* FNV-1a variation */ +#define HASH_FNV(key,keylen,hashv) \ +do { \ + unsigned _fn_i; \ + const unsigned char *_hf_key=(const unsigned char*)(key); \ + hashv = 2166136261U; \ + for(_fn_i=0; _fn_i < keylen; _fn_i++) { \ + hashv = hashv ^ _hf_key[_fn_i]; \ + hashv = hashv * 16777619U; \ + } \ +} while (0) + +#define HASH_OAT(key,keylen,hashv) \ +do { \ + unsigned _ho_i; \ + const unsigned char *_ho_key=(const unsigned char*)(key); \ + hashv = 0; \ + for(_ho_i=0; _ho_i < keylen; _ho_i++) { \ + hashv += _ho_key[_ho_i]; \ + hashv += (hashv << 10); \ + hashv ^= (hashv >> 6); \ + } \ + hashv += (hashv << 3); \ + hashv ^= (hashv >> 11); \ + hashv += (hashv << 15); \ +} while (0) + +#define HASH_JEN_MIX(a,b,c) \ +do { \ + a -= b; a -= c; a ^= ( c >> 13 ); \ + b -= c; b -= a; b ^= ( a << 8 ); \ + c -= a; c -= b; c ^= ( b >> 13 ); \ + a -= b; a -= c; a ^= ( c >> 12 ); \ + b -= c; b -= a; b ^= ( a << 16 ); \ + c -= a; c -= b; c ^= ( b >> 5 ); \ + a -= b; a -= c; a ^= ( c >> 3 ); \ + b -= c; b -= a; b ^= ( a << 10 ); \ + c -= a; c -= b; c ^= ( b >> 15 ); \ +} while (0) + +#define HASH_JEN(key,keylen,hashv) \ +do { \ + unsigned _hj_i,_hj_j,_hj_k; \ + unsigned const char *_hj_key=(unsigned const char*)(key); \ + hashv = 0xfeedbeefu; \ + _hj_i = _hj_j = 0x9e3779b9u; \ + _hj_k = (unsigned)(keylen); \ + while (_hj_k >= 12U) { \ + _hj_i += (_hj_key[0] + ( (unsigned)_hj_key[1] << 8 ) \ + + ( (unsigned)_hj_key[2] << 16 ) \ + + ( (unsigned)_hj_key[3] << 24 ) ); \ + _hj_j += (_hj_key[4] + ( (unsigned)_hj_key[5] << 8 ) \ + + ( (unsigned)_hj_key[6] << 16 ) \ + + ( (unsigned)_hj_key[7] << 24 ) ); \ + hashv += (_hj_key[8] + ( (unsigned)_hj_key[9] << 8 ) \ + + ( (unsigned)_hj_key[10] << 16 ) \ + + ( (unsigned)_hj_key[11] << 24 ) ); \ + \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ + \ + _hj_key += 12; \ + _hj_k -= 12U; \ + } \ + hashv += (unsigned)(keylen); \ + switch ( _hj_k ) { \ + case 11: hashv += ( (unsigned)_hj_key[10] << 24 ); /* FALLTHROUGH */ \ + case 10: hashv += ( (unsigned)_hj_key[9] << 16 ); /* FALLTHROUGH */ \ + case 9: hashv += ( (unsigned)_hj_key[8] << 8 ); /* FALLTHROUGH */ \ + case 8: _hj_j += ( (unsigned)_hj_key[7] << 24 ); /* FALLTHROUGH */ \ + case 7: _hj_j += ( (unsigned)_hj_key[6] << 16 ); /* FALLTHROUGH */ \ + case 6: _hj_j += ( (unsigned)_hj_key[5] << 8 ); /* FALLTHROUGH */ \ + case 5: _hj_j += _hj_key[4]; /* FALLTHROUGH */ \ + case 4: _hj_i += ( (unsigned)_hj_key[3] << 24 ); /* FALLTHROUGH */ \ + case 3: _hj_i += ( (unsigned)_hj_key[2] << 16 ); /* FALLTHROUGH */ \ + case 2: _hj_i += ( (unsigned)_hj_key[1] << 8 ); /* FALLTHROUGH */ \ + case 1: _hj_i += _hj_key[0]; \ + } \ + HASH_JEN_MIX(_hj_i, _hj_j, hashv); \ +} while (0) + +/* The Paul Hsieh hash function */ +#undef get16bits +#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ + || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) +#define get16bits(d) (*((const uint16_t *) (d))) +#endif + +#if !defined (get16bits) +#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8) \ + +(uint32_t)(((const uint8_t *)(d))[0]) ) +#endif +#define HASH_SFH(key,keylen,hashv) \ +do { \ + unsigned const char *_sfh_key=(unsigned const char*)(key); \ + uint32_t _sfh_tmp, _sfh_len = (uint32_t)keylen; \ + \ + unsigned _sfh_rem = _sfh_len & 3U; \ + _sfh_len >>= 2; \ + hashv = 0xcafebabeu; \ + \ + /* Main loop */ \ + for (;_sfh_len > 0U; _sfh_len--) { \ + hashv += get16bits (_sfh_key); \ + _sfh_tmp = ((uint32_t)(get16bits (_sfh_key+2)) << 11) ^ hashv; \ + hashv = (hashv << 16) ^ _sfh_tmp; \ + _sfh_key += 2U*sizeof (uint16_t); \ + hashv += hashv >> 11; \ + } \ + \ + /* Handle end cases */ \ + switch (_sfh_rem) { \ + case 3: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 16; \ + hashv ^= (uint32_t)(_sfh_key[sizeof (uint16_t)]) << 18; \ + hashv += hashv >> 11; \ + break; \ + case 2: hashv += get16bits (_sfh_key); \ + hashv ^= hashv << 11; \ + hashv += hashv >> 17; \ + break; \ + case 1: hashv += *_sfh_key; \ + hashv ^= hashv << 10; \ + hashv += hashv >> 1; \ + } \ + \ + /* Force "avalanching" of final 127 bits */ \ + hashv ^= hashv << 3; \ + hashv += hashv >> 5; \ + hashv ^= hashv << 4; \ + hashv += hashv >> 17; \ + hashv ^= hashv << 25; \ + hashv += hashv >> 6; \ +} while (0) + +#ifdef HASH_USING_NO_STRICT_ALIASING +/* The MurmurHash exploits some CPU's (x86,x86_64) tolerance for unaligned reads. + * For other types of CPU's (e.g. Sparc) an unaligned read causes a bus error. + * MurmurHash uses the faster approach only on CPU's where we know it's safe. + * + * Note the preprocessor built-in defines can be emitted using: + * + * gcc -m64 -dM -E - < /dev/null (on gcc) + * cc -## a.c (where a.c is a simple test file) (Sun Studio) + */ +#if (defined(__i386__) || defined(__x86_64__) || defined(_M_IX86)) +#define MUR_GETBLOCK(p,i) p[i] +#else /* non intel */ +#define MUR_PLUS0_ALIGNED(p) (((unsigned long)p & 3UL) == 0UL) +#define MUR_PLUS1_ALIGNED(p) (((unsigned long)p & 3UL) == 1UL) +#define MUR_PLUS2_ALIGNED(p) (((unsigned long)p & 3UL) == 2UL) +#define MUR_PLUS3_ALIGNED(p) (((unsigned long)p & 3UL) == 3UL) +#define WP(p) ((uint32_t*)((unsigned long)(p) & ~3UL)) +#ifdef HAVE_BIG_ENDIAN +#define MUR_THREE_ONE(p) ((((*WP(p))&0x00ffffff) << 8) | (((*(WP(p)+1))&0xff000000) >> 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0x0000ffff) <<16) | (((*(WP(p)+1))&0xffff0000) >> 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0x000000ff) <<24) | (((*(WP(p)+1))&0xffffff00) >> 8)) +#else /* little endian non-intel */ +#define MUR_THREE_ONE(p) ((((*WP(p))&0xffffff00) >> 8) | (((*(WP(p)+1))&0x000000ff) << 24)) +#define MUR_TWO_TWO(p) ((((*WP(p))&0xffff0000) >>16) | (((*(WP(p)+1))&0x0000ffff) << 16)) +#define MUR_ONE_THREE(p) ((((*WP(p))&0xff000000) >>24) | (((*(WP(p)+1))&0x00ffffff) << 8)) +#endif +#define MUR_GETBLOCK(p,i) (MUR_PLUS0_ALIGNED(p) ? ((p)[i]) : \ + (MUR_PLUS1_ALIGNED(p) ? MUR_THREE_ONE(p) : \ + (MUR_PLUS2_ALIGNED(p) ? MUR_TWO_TWO(p) : \ + MUR_ONE_THREE(p)))) +#endif +#define MUR_ROTL32(x,r) (((x) << (r)) | ((x) >> (32 - (r)))) +#define MUR_FMIX(_h) \ +do { \ + _h ^= _h >> 16; \ + _h *= 0x85ebca6bu; \ + _h ^= _h >> 13; \ + _h *= 0xc2b2ae35u; \ + _h ^= _h >> 16; \ +} while (0) + +#define HASH_MUR(key,keylen,hashv) \ +do { \ + const uint8_t *_mur_data = (const uint8_t*)(key); \ + const int _mur_nblocks = (int)(keylen) / 4; \ + uint32_t _mur_h1 = 0xf88D5353u; \ + uint32_t _mur_c1 = 0xcc9e2d51u; \ + uint32_t _mur_c2 = 0x1b873593u; \ + uint32_t _mur_k1 = 0; \ + const uint8_t *_mur_tail; \ + const uint32_t *_mur_blocks = (const uint32_t*)(_mur_data+(_mur_nblocks*4)); \ + int _mur_i; \ + for(_mur_i = -_mur_nblocks; _mur_i!=0; _mur_i++) { \ + _mur_k1 = MUR_GETBLOCK(_mur_blocks,_mur_i); \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + \ + _mur_h1 ^= _mur_k1; \ + _mur_h1 = MUR_ROTL32(_mur_h1,13); \ + _mur_h1 = (_mur_h1*5U) + 0xe6546b64u; \ + } \ + _mur_tail = (const uint8_t*)(_mur_data + (_mur_nblocks*4)); \ + _mur_k1=0; \ + switch((keylen) & 3U) { \ + case 3: _mur_k1 ^= (uint32_t)_mur_tail[2] << 16; /* FALLTHROUGH */ \ + case 2: _mur_k1 ^= (uint32_t)_mur_tail[1] << 8; /* FALLTHROUGH */ \ + case 1: _mur_k1 ^= (uint32_t)_mur_tail[0]; \ + _mur_k1 *= _mur_c1; \ + _mur_k1 = MUR_ROTL32(_mur_k1,15); \ + _mur_k1 *= _mur_c2; \ + _mur_h1 ^= _mur_k1; \ + } \ + _mur_h1 ^= (uint32_t)(keylen); \ + MUR_FMIX(_mur_h1); \ + hashv = _mur_h1; \ +} while (0) +#endif /* HASH_USING_NO_STRICT_ALIASING */ + +/* iterate over items in a known bucket to find desired item */ +#define HASH_FIND_IN_BKT(tbl,hh,head,keyptr,keylen_in,hashval,out) \ +do { \ + if ((head).hh_head != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (head).hh_head)); \ + } else { \ + (out) = NULL; \ + } \ + while ((out) != NULL) { \ + if ((out)->hh.hashv == (hashval) && (out)->hh.keylen == (keylen_in)) { \ + if (uthash_memcmp((out)->hh.key, keyptr, keylen_in) == 0) { \ + break; \ + } \ + } \ + if ((out)->hh.hh_next != NULL) { \ + DECLTYPE_ASSIGN(out, ELMT_FROM_HH(tbl, (out)->hh.hh_next)); \ + } else { \ + (out) = NULL; \ + } \ + } \ +} while (0) + +/* add an item to a bucket */ +#define HASH_ADD_TO_BKT(head,addhh) \ +do { \ + head.count++; \ + (addhh)->hh_next = head.hh_head; \ + (addhh)->hh_prev = NULL; \ + if (head.hh_head != NULL) { (head).hh_head->hh_prev = (addhh); } \ + (head).hh_head=addhh; \ + if ((head.count >= ((head.expand_mult+1U) * HASH_BKT_CAPACITY_THRESH)) \ + && ((addhh)->tbl->noexpand != 1U)) { \ + HASH_EXPAND_BUCKETS((addhh)->tbl); \ + } \ +} while (0) + +/* remove an item from a given bucket */ +#define HASH_DEL_IN_BKT(hh,head,hh_del) \ + (head).count--; \ + if ((head).hh_head == hh_del) { \ + (head).hh_head = hh_del->hh_next; \ + } \ + if (hh_del->hh_prev) { \ + hh_del->hh_prev->hh_next = hh_del->hh_next; \ + } \ + if (hh_del->hh_next) { \ + hh_del->hh_next->hh_prev = hh_del->hh_prev; \ + } + +/* Bucket expansion has the effect of doubling the number of buckets + * and redistributing the items into the new buckets. Ideally the + * items will distribute more or less evenly into the new buckets + * (the extent to which this is true is a measure of the quality of + * the hash function as it applies to the key domain). + * + * With the items distributed into more buckets, the chain length + * (item count) in each bucket is reduced. Thus by expanding buckets + * the hash keeps a bound on the chain length. This bounded chain + * length is the essence of how a hash provides constant time lookup. + * + * The calculation of tbl->ideal_chain_maxlen below deserves some + * explanation. First, keep in mind that we're calculating the ideal + * maximum chain length based on the *new* (doubled) bucket count. + * In fractions this is just n/b (n=number of items,b=new num buckets). + * Since the ideal chain length is an integer, we want to calculate + * ceil(n/b). We don't depend on floating point arithmetic in this + * hash, so to calculate ceil(n/b) with integers we could write + * + * ceil(n/b) = (n/b) + ((n%b)?1:0) + * + * and in fact a previous version of this hash did just that. + * But now we have improved things a bit by recognizing that b is + * always a power of two. We keep its base 2 log handy (call it lb), + * so now we can write this with a bit shift and logical AND: + * + * ceil(n/b) = (n>>lb) + ( (n & (b-1)) ? 1:0) + * + */ +#define HASH_EXPAND_BUCKETS(tbl) \ +do { \ + unsigned _he_bkt; \ + unsigned _he_bkt_i; \ + struct UT_hash_handle *_he_thh, *_he_hh_nxt; \ + UT_hash_bucket *_he_new_buckets, *_he_newbkt; \ + _he_new_buckets = (UT_hash_bucket*)uthash_malloc( \ + 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + if (!_he_new_buckets) { uthash_fatal( "out of memory"); } \ + memset(_he_new_buckets, 0, \ + 2UL * tbl->num_buckets * sizeof(struct UT_hash_bucket)); \ + tbl->ideal_chain_maxlen = \ + (tbl->num_items >> (tbl->log2_num_buckets+1U)) + \ + (((tbl->num_items & ((tbl->num_buckets*2U)-1U)) != 0U) ? 1U : 0U); \ + tbl->nonideal_items = 0; \ + for(_he_bkt_i = 0; _he_bkt_i < tbl->num_buckets; _he_bkt_i++) \ + { \ + _he_thh = tbl->buckets[ _he_bkt_i ].hh_head; \ + while (_he_thh != NULL) { \ + _he_hh_nxt = _he_thh->hh_next; \ + HASH_TO_BKT( _he_thh->hashv, tbl->num_buckets*2U, _he_bkt); \ + _he_newbkt = &(_he_new_buckets[ _he_bkt ]); \ + if (++(_he_newbkt->count) > tbl->ideal_chain_maxlen) { \ + tbl->nonideal_items++; \ + _he_newbkt->expand_mult = _he_newbkt->count / \ + tbl->ideal_chain_maxlen; \ + } \ + _he_thh->hh_prev = NULL; \ + _he_thh->hh_next = _he_newbkt->hh_head; \ + if (_he_newbkt->hh_head != NULL) { _he_newbkt->hh_head->hh_prev = \ + _he_thh; } \ + _he_newbkt->hh_head = _he_thh; \ + _he_thh = _he_hh_nxt; \ + } \ + } \ + uthash_free( tbl->buckets, tbl->num_buckets*sizeof(struct UT_hash_bucket) ); \ + tbl->num_buckets *= 2U; \ + tbl->log2_num_buckets++; \ + tbl->buckets = _he_new_buckets; \ + tbl->ineff_expands = (tbl->nonideal_items > (tbl->num_items >> 1)) ? \ + (tbl->ineff_expands+1U) : 0U; \ + if (tbl->ineff_expands > 1U) { \ + tbl->noexpand=1; \ + uthash_noexpand_fyi(tbl); \ + } \ + uthash_expand_fyi(tbl); \ +} while (0) + + +/* This is an adaptation of Simon Tatham's O(n log(n)) mergesort */ +/* Note that HASH_SORT assumes the hash handle name to be hh. + * HASH_SRT was added to allow the hash handle name to be passed in. */ +#define HASH_SORT(head,cmpfcn) HASH_SRT(hh,head,cmpfcn) +#define HASH_SRT(hh,head,cmpfcn) \ +do { \ + unsigned _hs_i; \ + unsigned _hs_looping,_hs_nmerges,_hs_insize,_hs_psize,_hs_qsize; \ + struct UT_hash_handle *_hs_p, *_hs_q, *_hs_e, *_hs_list, *_hs_tail; \ + if (head != NULL) { \ + _hs_insize = 1; \ + _hs_looping = 1; \ + _hs_list = &((head)->hh); \ + while (_hs_looping != 0U) { \ + _hs_p = _hs_list; \ + _hs_list = NULL; \ + _hs_tail = NULL; \ + _hs_nmerges = 0; \ + while (_hs_p != NULL) { \ + _hs_nmerges++; \ + _hs_q = _hs_p; \ + _hs_psize = 0; \ + for ( _hs_i = 0; _hs_i < _hs_insize; _hs_i++ ) { \ + _hs_psize++; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + if (! (_hs_q) ) { break; } \ + } \ + _hs_qsize = _hs_insize; \ + while ((_hs_psize > 0U) || ((_hs_qsize > 0U) && (_hs_q != NULL))) {\ + if (_hs_psize == 0U) { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } else if ( (_hs_qsize == 0U) || (_hs_q == NULL) ) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else if (( \ + cmpfcn(DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_p)), \ + DECLTYPE(head)(ELMT_FROM_HH((head)->hh.tbl,_hs_q))) \ + ) <= 0) { \ + _hs_e = _hs_p; \ + if (_hs_p != NULL){ \ + _hs_p = (UT_hash_handle*)((_hs_p->next != NULL) ? \ + ((void*)((char*)(_hs_p->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + } \ + _hs_psize--; \ + } else { \ + _hs_e = _hs_q; \ + _hs_q = (UT_hash_handle*)((_hs_q->next != NULL) ? \ + ((void*)((char*)(_hs_q->next) + \ + (head)->hh.tbl->hho)) : NULL); \ + _hs_qsize--; \ + } \ + if ( _hs_tail != NULL ) { \ + _hs_tail->next = ((_hs_e != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_e) : NULL); \ + } else { \ + _hs_list = _hs_e; \ + } \ + if (_hs_e != NULL) { \ + _hs_e->prev = ((_hs_tail != NULL) ? \ + ELMT_FROM_HH((head)->hh.tbl,_hs_tail) : NULL); \ + } \ + _hs_tail = _hs_e; \ + } \ + _hs_p = _hs_q; \ + } \ + if (_hs_tail != NULL){ \ + _hs_tail->next = NULL; \ + } \ + if ( _hs_nmerges <= 1U ) { \ + _hs_looping=0; \ + (head)->hh.tbl->tail = _hs_tail; \ + DECLTYPE_ASSIGN(head,ELMT_FROM_HH((head)->hh.tbl, _hs_list)); \ + } \ + _hs_insize *= 2U; \ + } \ + HASH_FSCK(hh,head); \ + } \ +} while (0) + +/* This function selects items from one hash into another hash. + * The end result is that the selected items have dual presence + * in both hashes. There is no copy of the items made; rather + * they are added into the new hash through a secondary hash + * hash handle that must be present in the structure. */ +#define HASH_SELECT(hh_dst, dst, hh_src, src, cond) \ +do { \ + unsigned _src_bkt, _dst_bkt; \ + void *_last_elt=NULL, *_elt; \ + UT_hash_handle *_src_hh, *_dst_hh, *_last_elt_hh=NULL; \ + ptrdiff_t _dst_hho = ((char*)(&(dst)->hh_dst) - (char*)(dst)); \ + if (src != NULL) { \ + for(_src_bkt=0; _src_bkt < (src)->hh_src.tbl->num_buckets; _src_bkt++) { \ + for(_src_hh = (src)->hh_src.tbl->buckets[_src_bkt].hh_head; \ + _src_hh != NULL; \ + _src_hh = _src_hh->hh_next) { \ + _elt = ELMT_FROM_HH((src)->hh_src.tbl, _src_hh); \ + if (cond(_elt)) { \ + _dst_hh = (UT_hash_handle*)(((char*)_elt) + _dst_hho); \ + _dst_hh->key = _src_hh->key; \ + _dst_hh->keylen = _src_hh->keylen; \ + _dst_hh->hashv = _src_hh->hashv; \ + _dst_hh->prev = _last_elt; \ + _dst_hh->next = NULL; \ + if (_last_elt_hh != NULL) { _last_elt_hh->next = _elt; } \ + if (dst == NULL) { \ + DECLTYPE_ASSIGN(dst,_elt); \ + HASH_MAKE_TABLE(hh_dst,dst); \ + } else { \ + _dst_hh->tbl = (dst)->hh_dst.tbl; \ + } \ + HASH_TO_BKT(_dst_hh->hashv, _dst_hh->tbl->num_buckets, _dst_bkt); \ + HASH_ADD_TO_BKT(_dst_hh->tbl->buckets[_dst_bkt],_dst_hh); \ + (dst)->hh_dst.tbl->num_items++; \ + _last_elt = _elt; \ + _last_elt_hh = _dst_hh; \ + } \ + } \ + } \ + } \ + HASH_FSCK(hh_dst,dst); \ +} while (0) + +#define HASH_CLEAR(hh,head) \ +do { \ + if (head != NULL) { \ + uthash_free((head)->hh.tbl->buckets, \ + (head)->hh.tbl->num_buckets*sizeof(struct UT_hash_bucket)); \ + HASH_BLOOM_FREE((head)->hh.tbl); \ + uthash_free((head)->hh.tbl, sizeof(UT_hash_table)); \ + (head)=NULL; \ + } \ +} while (0) + +#define HASH_OVERHEAD(hh,head) \ + ((head != NULL) ? ( \ + (size_t)(((head)->hh.tbl->num_items * sizeof(UT_hash_handle)) + \ + ((head)->hh.tbl->num_buckets * sizeof(UT_hash_bucket)) + \ + sizeof(UT_hash_table) + \ + (HASH_BLOOM_BYTELEN))) : 0U) + +#ifdef NO_DECLTYPE +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((*(char**)(&(tmp)))=(char*)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((*(char**)(&(tmp)))=(char*)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#else +#define HASH_ITER(hh,head,el,tmp) \ +for(((el)=(head)), ((tmp)=DECLTYPE(el)((head!=NULL)?(head)->hh.next:NULL)); \ + (el) != NULL; ((el)=(tmp)), ((tmp)=DECLTYPE(el)((tmp!=NULL)?(tmp)->hh.next:NULL))) +#endif + +/* obtain a count of items in the hash */ +#define HASH_COUNT(head) HASH_CNT(hh,head) +#define HASH_CNT(hh,head) ((head != NULL)?((head)->hh.tbl->num_items):0U) + +typedef struct UT_hash_bucket { + struct UT_hash_handle *hh_head; + unsigned count; + + /* expand_mult is normally set to 0. In this situation, the max chain length + * threshold is enforced at its default value, HASH_BKT_CAPACITY_THRESH. (If + * the bucket's chain exceeds this length, bucket expansion is triggered). + * However, setting expand_mult to a non-zero value delays bucket expansion + * (that would be triggered by additions to this particular bucket) + * until its chain length reaches a *multiple* of HASH_BKT_CAPACITY_THRESH. + * (The multiplier is simply expand_mult+1). The whole idea of this + * multiplier is to reduce bucket expansions, since they are expensive, in + * situations where we know that a particular bucket tends to be overused. + * It is better to let its chain length grow to a longer yet-still-bounded + * value, than to do an O(n) bucket expansion too often. + */ + unsigned expand_mult; + +} UT_hash_bucket; + +/* random signature used only to find hash tables in external analysis */ +#define HASH_SIGNATURE 0xa0111fe1u +#define HASH_BLOOM_SIGNATURE 0xb12220f2u + +typedef struct UT_hash_table { + UT_hash_bucket *buckets; + unsigned num_buckets, log2_num_buckets; + unsigned num_items; + struct UT_hash_handle *tail; /* tail hh in app order, for fast append */ + ptrdiff_t hho; /* hash handle offset (byte pos of hash handle in element */ + + /* in an ideal situation (all buckets used equally), no bucket would have + * more than ceil(#items/#buckets) items. that's the ideal chain length. */ + unsigned ideal_chain_maxlen; + + /* nonideal_items is the number of items in the hash whose chain position + * exceeds the ideal chain maxlen. these items pay the penalty for an uneven + * hash distribution; reaching them in a chain traversal takes >ideal steps */ + unsigned nonideal_items; + + /* ineffective expands occur when a bucket doubling was performed, but + * afterward, more than half the items in the hash had nonideal chain + * positions. If this happens on two consecutive expansions we inhibit any + * further expansion, as it's not helping; this happens when the hash + * function isn't a good fit for the key domain. When expansion is inhibited + * the hash will still work, albeit no longer in constant time. */ + unsigned ineff_expands, noexpand; + + uint32_t signature; /* used only to find hash tables in external analysis */ +#ifdef HASH_BLOOM + uint32_t bloom_sig; /* used only to test bloom exists in external analysis */ + uint8_t *bloom_bv; + uint8_t bloom_nbits; +#endif + +} UT_hash_table; + +typedef struct UT_hash_handle { + struct UT_hash_table *tbl; + void *prev; /* prev element in app order */ + void *next; /* next element in app order */ + struct UT_hash_handle *hh_prev; /* previous hh in bucket order */ + struct UT_hash_handle *hh_next; /* next hh in bucket order */ + void *key; /* ptr to enclosing struct's key */ + unsigned keylen; /* enclosing struct's key len */ + unsigned hashv; /* result of hash-fcn(key) */ +} UT_hash_handle; + +#endif /* UTHASH_H */ |